From 9100bd772d4ff153fd2d5cb13034f4ed8ea2d477 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 29 Sep 2020 15:32:53 +0700 Subject: [PATCH 001/544] [SCEV][NFC] Introduce isBasicBlockEntryGuardedByCond Currently, we have `isLoopEntryGuardedByCond` method in SCEV, which checks that some fact is true if we enter the loop. In fact, this is just a particular case of more general concept `isBasicBlockEntryGuardedByCond` applied to given loop's header. In fact, the logic if this code is largely independent on the given loop and only cares code above it. This patch makes this generalization. Now we can query it for any block, and `isBasicBlockEntryGuardedByCond` is just a particular case. Differential Revision: https://reviews.llvm.org/D87828 Reviewed By: fhahn --- llvm/include/llvm/Analysis/ScalarEvolution.h | 6 +++ llvm/lib/Analysis/ScalarEvolution.cpp | 53 ++++++++++++-------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index f2cf8fc102b13..4fc1ee08caf7d 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -677,6 +677,12 @@ class ScalarEvolution { bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test whether entry to the basic block is protected by a conditional + /// between LHS and RHS. + bool isBasicBlockEntryGuardedByCond(const BasicBlock *BB, + ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + /// Test whether the backedge of the loop is protected by a conditional /// between LHS and RHS. This is used to eliminate casts. bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e6ece25c12f22..756710909ac79 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9492,24 +9492,14 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return false; } -bool -ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, - ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS) { - // Interpret a null as meaning no loop, where there is obviously no guard - // (interprocedural conditions notwithstanding). - if (!L) return false; - +bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, + ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { if (VerifyIR) - assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && + assert(!verifyFunction(*BB->getParent(), &dbgs()) && "This cannot be done on broken IR!"); - // Both LHS and RHS must be available at loop entry. - assert(isAvailableAtLoopEntry(LHS, L) && - "LHS is not available at Loop Entry"); - assert(isAvailableAtLoopEntry(RHS, L) && - "RHS is not available at Loop Entry"); - if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) return true; @@ -9566,13 +9556,17 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; }; - // Starting at the loop predecessor, climb up the predecessor chain, as long + // Starting at the block's predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors - // leading to the original header. - for (std::pair Pair( - L->getLoopPredecessor(), L->getHeader()); + // leading to the original block. + const Loop *ContainingLoop = LI.getLoopFor(BB); + const BasicBlock *PredBB; + if (ContainingLoop && ContainingLoop->getHeader() == BB) + PredBB = ContainingLoop->getLoopPredecessor(); + else + PredBB = BB->getSinglePredecessor(); + for (std::pair Pair(PredBB, BB); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { - if (ProveViaGuard(Pair.first)) return true; @@ -9592,7 +9586,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, if (!AssumeVH) continue; auto *CI = cast(AssumeVH); - if (!DT.dominates(CI, L->getHeader())) + if (!DT.dominates(CI, BB)) continue; if (ProveViaCond(CI->getArgOperand(0), false)) @@ -9602,6 +9596,23 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } +bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) + return false; + + // Both LHS and RHS must be available at loop entry. + assert(isAvailableAtLoopEntry(LHS, L) && + "LHS is not available at Loop Entry"); + assert(isAvailableAtLoopEntry(RHS, L) && + "RHS is not available at Loop Entry"); + return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS); +} + bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Value *FoundCondValue, bool Inverse) { From 60b852092c98dbdc6248d60109d90ae6f8ad841c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Sep 2020 10:38:44 +0100 Subject: [PATCH 002/544] [LoopDeletion] Forget loop before setting values to undef After D71539, we need to forget the loop before setting the incoming values of phi nodes in exit blocks, because we are looking through those phi nodes now and the SCEV expression could depend on the loop phi. If we update the phi nodes before forgetting the loop, we miss those users during invalidation. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D88167 --- llvm/lib/Transforms/Scalar/LoopDeletion.cpp | 4 + llvm/lib/Transforms/Utils/LoopUtils.cpp | 3 + .../Transforms/LoopDeletion/update-scev.ll | 114 +++++++++++++++++- 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index d9cde031cc604..76ba2f58e8507 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -165,6 +165,10 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, if (ExitBlock && isLoopNeverExecuted(L)) { LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); + // We need to forget the loop before setting the incoming values of the exit + // phis to undef, so we properly invalidate the SCEV expressions for those + // phis. + SE.forgetLoop(L); // Set incoming value to undef for phi nodes in the exit block. for (PHINode &P : ExitBlock->phis()) { std::fill(P.incoming_values().begin(), P.incoming_values().end(), diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index d7cd9b19b8d51..f2496c4a1ea19 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -724,6 +724,9 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } LI->destroy(L); } + + if (SE) + SE->verify(); } /// Checks if \p L has single exit through latch block except possibly diff --git a/llvm/test/Transforms/LoopDeletion/update-scev.ll b/llvm/test/Transforms/LoopDeletion/update-scev.ll index 641ba55ed8f60..528a00a93f66b 100644 --- a/llvm/test/Transforms/LoopDeletion/update-scev.ll +++ b/llvm/test/Transforms/LoopDeletion/update-scev.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -analyze -scalar-evolution -loop-deletion -scalar-evolution < %s | FileCheck %s --check-prefix=SCEV-EXPRS +; RUN: opt -S -analyze -scalar-evolution -loop-deletion -scalar-evolution -verify-scev < %s | FileCheck %s --check-prefix=SCEV-EXPRS ; RUN: opt -S -loop-deletion < %s | FileCheck %s --check-prefix=IR-AFTER-TRANSFORM ; RUN: opt -S -indvars -loop-deletion -indvars < %s | FileCheck %s --check-prefix=ORIGINAL-CRASH @@ -54,3 +54,115 @@ for.inc11: ; preds = %for.body6 for.cond14: ; preds = %for.cond14, %for.inc11 br i1 undef, label %for.cond, label %for.cond14 } + +; LoopDeletion removes the loop %for.body7.1. Make sure %inc.lcssa.1 in the loop +; exit block is correctly invalidated. + +define void @test2(double* %bx, i64 %by) local_unnamed_addr align 2 { +; IR-AFTER-TRANSFORM-LABEL: @test2( +; IR-AFTER-TRANSFORM-NOT: for.body7.1: + +; SCEV-EXPRS-LABEL: test2 +; SCEV-EXPRS: %inc.lcssa.1 = phi i64 [ undef, %for.body7.preheader.1 ] +; SCEV-EXPRS-NEXT: --> undef +entry: + %cmp = icmp sgt i64 %by, 0 + br label %for.cond.preheader + +for.cond.preheader: ; preds = %entry + br i1 %cmp, label %for.cond5.preheader.lr.ph, label %for.end14 + +for.cond5.preheader.lr.ph: ; preds = %for.cond.preheader + br label %for.cond5.preheader + +for.cond.loopexit.loopexit: ; preds = %for.body10 + %inc11.lcssa = phi i64 [ %inc11, %for.body10 ] + br label %for.cond.loopexit + +for.cond.loopexit: ; preds = %for.cond8.preheader, %for.cond.loopexit.loopexit + %ca.3.lcssa = phi i64 [ %ca.2.lcssa, %for.cond8.preheader ], [ %inc11.lcssa, %for.cond.loopexit.loopexit ] + br i1 %cmp, label %for.cond5.preheader, label %for.end14.loopexit + +for.cond5.preheader: ; preds = %for.cond.loopexit, %for.cond5.preheader.lr.ph + %ca.19 = phi i64 [ 0, %for.cond5.preheader.lr.ph ], [ %ca.3.lcssa, %for.cond.loopexit ] + br i1 false, label %for.cond8.preheader, label %for.body7.preheader + +for.body7.preheader: ; preds = %for.cond5.preheader + br label %for.body7 + +for.cond8.preheader.loopexit: ; preds = %for.body7 + %inc.lcssa = phi i64 [ %inc, %for.body7 ] + br label %for.cond8.preheader + +for.cond8.preheader: ; preds = %for.cond8.preheader.loopexit, %for.cond5.preheader + %ca.2.lcssa = phi i64 [ %ca.19, %for.cond5.preheader ], [ %inc.lcssa, %for.cond8.preheader.loopexit ] + br i1 true, label %for.body10.preheader, label %for.cond.loopexit + +for.body10.preheader: ; preds = %for.cond8.preheader + br label %for.body10 + +for.body7: ; preds = %for.body7, %for.body7.preheader + %ca.26 = phi i64 [ %inc, %for.body7 ], [ %ca.19, %for.body7.preheader ] + %inc = add nsw i64 %ca.26, 1 + %arrayidx = getelementptr inbounds double, double* %bx, i64 %ca.26 + store double 0.000000e+00, double* %arrayidx, align 8 + br i1 false, label %for.cond8.preheader.loopexit, label %for.body7 + +for.body10: ; preds = %for.body10, %for.body10.preheader + %ca.37 = phi i64 [ %inc11, %for.body10 ], [ %ca.2.lcssa, %for.body10.preheader ] + %inc11 = add nsw i64 %ca.37, 1 + br i1 true, label %for.body10, label %for.cond.loopexit.loopexit + +for.end14.loopexit: ; preds = %for.cond.loopexit + br label %for.end14 + +for.end14: ; preds = %for.end14.loopexit, %for.cond.preheader + br i1 %cmp, label %for.cond5.preheader.lr.ph.1, label %for.end14.1 + +for.cond5.preheader.lr.ph.1: ; preds = %for.end14 + br label %for.cond5.preheader.1 + +for.cond5.preheader.1: ; preds = %for.cond.loopexit.1, %for.cond5.preheader.lr.ph.1 + %ca.19.1 = phi i64 [ 0, %for.cond5.preheader.lr.ph.1 ], [ %ca.3.lcssa.1, %for.cond.loopexit.1 ] + br i1 true, label %for.cond8.preheader.1, label %for.body7.preheader.1 + +for.body7.preheader.1: ; preds = %for.cond5.preheader.1 + br label %for.body7.1 + +for.body7.1: ; preds = %for.body7.1, %for.body7.preheader.1 + %ca.26.1 = phi i64 [ %inc.1, %for.body7.1 ], [ %ca.19.1, %for.body7.preheader.1 ] + %inc.1 = add nsw i64 %ca.26.1, 1 + %arrayidx.1 = getelementptr inbounds double, double* %bx, i64 %ca.26.1 + store double 0.000000e+00, double* %arrayidx.1, align 8 + br i1 true, label %for.cond8.preheader.loopexit.1, label %for.body7.1 + +for.cond8.preheader.loopexit.1: ; preds = %for.body7.1 + %inc.lcssa.1 = phi i64 [ %inc.1, %for.body7.1 ] + br label %for.cond8.preheader.1 + +for.cond8.preheader.1: ; preds = %for.cond8.preheader.loopexit.1, %for.cond5.preheader.1 + %ca.2.lcssa.1 = phi i64 [ %ca.19.1, %for.cond5.preheader.1 ], [ %inc.lcssa.1, %for.cond8.preheader.loopexit.1 ] + br i1 false, label %for.body10.preheader.1, label %for.cond.loopexit.1 + +for.body10.preheader.1: ; preds = %for.cond8.preheader.1 + br label %for.body10.1 + +for.body10.1: ; preds = %for.body10.1, %for.body10.preheader.1 + %ca.37.1 = phi i64 [ %inc11.1, %for.body10.1 ], [ %ca.2.lcssa.1, %for.body10.preheader.1 ] + %inc11.1 = add nsw i64 %ca.37.1, 1 + br i1 false, label %for.body10.1, label %for.cond.loopexit.loopexit.1 + +for.cond.loopexit.loopexit.1: ; preds = %for.body10.1 + %inc11.lcssa.1 = phi i64 [ %inc11.1, %for.body10.1 ] + br label %for.cond.loopexit.1 + +for.cond.loopexit.1: ; preds = %for.cond.loopexit.loopexit.1, %for.cond8.preheader.1 + %ca.3.lcssa.1 = phi i64 [ %ca.2.lcssa.1, %for.cond8.preheader.1 ], [ %inc11.lcssa.1, %for.cond.loopexit.loopexit.1 ] + br i1 %cmp, label %for.cond5.preheader.1, label %for.end14.loopexit.1 + +for.end14.loopexit.1: ; preds = %for.cond.loopexit.1 + br label %for.end14.1 + +for.end14.1: ; preds = %for.end14.loopexit.1, %for.end14 + ret void +} From 324df2661b3ee38996fab589a2605da458fa249e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 10:56:00 +0100 Subject: [PATCH 003/544] [InstCombine] Add trunc(lshr(sext(x),c)) non-uniform vector tests --- llvm/test/Transforms/InstCombine/cast.ll | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index dcd3013621fdc..ad6d22aa06e43 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1546,6 +1546,56 @@ define i8 @trunc_lshr_sext(i8 %A) { ret i8 %D } +define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_sext_uniform( +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: ret <2 x i8> [[D]] +; + %B = sext <2 x i8> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + +define <2 x i8> @trunc_lshr_sext_uniform_undef(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_sext_uniform_undef( +; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: ret <2 x i8> [[D]] +; + %B = sext <2 x i8> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + +define <2 x i8> @trunc_lshr_sext_nonuniform(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_sext_nonuniform( +; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: ret <2 x i8> [[D]] +; + %B = sext <2 x i8> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + +define <3 x i8> @trunc_lshr_sext_nonuniform_undef(<3 x i8> %A) { +; ALL-LABEL: @trunc_lshr_sext_nonuniform_undef( +; ALL-NEXT: [[B:%.*]] = sext <3 x i8> [[A:%.*]] to <3 x i32> +; ALL-NEXT: [[C:%.*]] = lshr <3 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = trunc <3 x i32> [[C]] to <3 x i8> +; ALL-NEXT: ret <3 x i8> [[D]] +; + %B = sext <3 x i8> %A to <3 x i32> + %C = lshr <3 x i32> %B, + %D = trunc <3 x i32> %C to <3 x i8> + ret <3 x i8> %D +} + define <2 x i8> @trunc_lshr_sext_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> From cb9cfa0d2fddad97828fc46ddcbf86f6b9bf7d6d Mon Sep 17 00:00:00 2001 From: sstefan1 Date: Tue, 29 Sep 2020 11:51:36 +0200 Subject: [PATCH 004/544] [OpenMPOpt][Fix] Only initialize ICV initial values once. Reviewers: jdoerfert, ggeorgakoudis Differential Revision: https://reviews.llvm.org/D88441 --- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h | 6 +++--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 16 ++++++++++++++++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 3 ++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index f612fb3cd948e..3ad13ddc51369 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -41,12 +41,12 @@ enum class InternalControlVar { #include "llvm/Frontend/OpenMP/OMPKinds.def" enum class ICVInitValue { -#define ICV_DATA_ENV(Enum, Name, EnvVar, Init) Init, +#define ICV_INIT_VALUE(Enum, Name) Enum, #include "llvm/Frontend/OpenMP/OMPKinds.def" }; -#define ICV_DATA_ENV(Enum, Name, EnvVar, Init) \ - constexpr auto Init = omp::ICVInitValue::Init; +#define ICV_INIT_VALUE(Enum, Name) \ + constexpr auto Enum = omp::ICVInitValue::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" /// IDs for all omp runtime library (RTL) functions. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index f74f036c311b7..e93f836ea3fad 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -230,6 +230,20 @@ __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /// ///{ +#ifndef ICV_INIT_VALUE +#define ICV_INIT_VALUE(Enum, Name) +#endif + +#define __ICV_INIT_VALUE(Name) ICV_INIT_VALUE(ICV_##Name, #Name) + +__ICV_INIT_VALUE(ZERO) +__ICV_INIT_VALUE(FALSE) +__ICV_INIT_VALUE(IMPLEMENTATION_DEFINED) +__ICV_INIT_VALUE(LAST) + +#undef __ICV_INIT_VALUE +#undef ICV_INIT_VALUE + #ifndef ICV_DATA_ENV #define ICV_DATA_ENV(Enum, Name, EnvVarName, Init) #endif @@ -240,6 +254,7 @@ __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, __ICV_DATA_ENV(nthreads, OMP_NUM_THREADS, ICV_IMPLEMENTATION_DEFINED) __ICV_DATA_ENV(active_levels, NONE, ICV_ZERO) __ICV_DATA_ENV(cancel, OMP_CANCELLATION, ICV_FALSE) +__ICV_DATA_ENV(proc_bind, OMP_PROC_BIND, ICV_IMPLEMENTATION_DEFINED) __ICV_DATA_ENV(__last, last, ICV_LAST) #undef __ICV_DATA_ENV @@ -265,6 +280,7 @@ __ICV_RT_SET(nthreads, omp_set_num_threads) __ICV_RT_GET(nthreads, omp_get_max_threads) __ICV_RT_GET(active_levels, omp_get_active_level) __ICV_RT_GET(cancel, omp_get_cancellation) +__ICV_RT_GET(proc_bind, omp_get_proc_bind) #undef __ICV_RT_GET #undef ICV_RT_GET diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 66928ebe8c9ee..d372f108e3d40 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -518,7 +518,8 @@ struct OpenMPOpt { /// Print initial ICV values for testing. /// FIXME: This should be done from the Attributor once it is added. void printICVs() const { - InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; + InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, + ICV_proc_bind}; for (Function *F : OMPInfoCache.ModuleSlice) { for (auto ICV : ICVs) { From 1fd9a146d375c256475bb3e11df8d6539ae3f764 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 03:08:24 -0700 Subject: [PATCH 005/544] [msan] Add test for vector abs intrinsic --- .../MemorySanitizer/abs-vector.ll | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll new file mode 100644 index 0000000000000..c54bb81d59583 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s +; RUN: opt < %s -S -msan-check-access-address=0 -msan | FileCheck %s +; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN +; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x i64> @test_mm256_abs_epi8(<4 x i64> noundef %a) local_unnamed_addr #0 { +; CHECK-LABEL: @test_mm256_abs_epi8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof !2 +; CHECK: call void @__msan_warning_with_origin_noreturn +; CHECK: unreachable +; CHECK: [[TMP7:%.*]] = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[TMP7]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x i64> [[TMP8]] +; +entry: + %0 = bitcast <4 x i64> %a to <32 x i8> + %1 = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> %0, i1 false) + %2 = bitcast <32 x i8> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a) local_unnamed_addr #0 { +; CHECK-LABEL: @test_mm256_abs_epi16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <16 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof !2 +; CHECK: call void @__msan_warning_with_origin_noreturn +; CHECK: unreachable +; CHECK: [[TMP7:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP7]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x i64> [[TMP8]] +; +entry: + %0 = bitcast <4 x i64> %a to <16 x i16> + %1 = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %0, i1 false) + %2 = bitcast <16 x i16> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a) local_unnamed_addr #0 { +; CHECK-LABEL: @test_mm256_abs_epi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof !2 +; CHECK: call void @__msan_warning_with_origin_noreturn +; CHECK: unreachable +; CHECK: [[TMP7:%.*]] = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP7]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x i64> [[TMP8]] +; +entry: + %0 = bitcast <4 x i64> %a to <8 x i32> + %1 = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> %0, i1 false) + %2 = bitcast <8 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) #1 +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) #1 +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1 + +attributes #0 = { nounwind readnone sanitize_memory } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0"} From 4aa6abe4efc1b648e7ede290210569ca7a703867 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 03:15:37 -0700 Subject: [PATCH 006/544] [msan] Fix llvm.abs.v intrinsic The last argument of the intrinsic is a boolean flag to control INT_MIN handling and does not affect msan metadata. --- .../Instrumentation/MemorySanitizer.cpp | 5 ++ .../MemorySanitizer/abs-vector.ll | 57 ++++++++----------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index f65d701fe9b07..cd54b6c2cd8f6 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2638,6 +2638,11 @@ struct MemorySanitizerVisitor : public InstVisitor { return false; unsigned NumArgOperands = I.getNumArgOperands(); + if (I.getIntrinsicID() == Intrinsic::abs) { + assert(NumArgOperands == 2); + // The last argument is just a boolean flag. + NumArgOperands = 1; + } for (unsigned i = 0; i < NumArgOperands; ++i) { Type *Ty = I.getArgOperand(i)->getType(); diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll index c54bb81d59583..a8ce0561c3b87 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -12,18 +12,15 @@ define <4 x i64> @test_mm256_abs_epi8(<4 x i64> noundef %a) local_unnamed_addr # ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 -; CHECK: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8> +; CHECK: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof !2 -; CHECK: call void @__msan_warning_with_origin_noreturn -; CHECK: unreachable -; CHECK: [[TMP7:%.*]] = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[TMP7]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 -; ORIGIN-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 -; CHECK: ret <4 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i8> [[TMP4]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x i64> [[TMP6]] ; entry: %0 = bitcast <4 x i64> %a to <32 x i8> @@ -37,18 +34,15 @@ define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a) local_unnamed_addr #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 -; CHECK: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <16 x i16> +; CHECK: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <16 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof !2 -; CHECK: call void @__msan_warning_with_origin_noreturn -; CHECK: unreachable -; CHECK: [[TMP7:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP7]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 -; ORIGIN-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 -; CHECK: ret <4 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x i64> [[TMP6]] ; entry: %0 = bitcast <4 x i64> %a to <16 x i16> @@ -62,18 +56,15 @@ define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a) local_unnamed_addr #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 -; CHECK: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <8 x i32> +; CHECK: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof !2 -; CHECK: call void @__msan_warning_with_origin_noreturn -; CHECK: unreachable -; CHECK: [[TMP7:%.*]] = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP7]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 -; ORIGIN-NEXT: store i32 0, i32* @__msan_retval_origin_tls, align 4 -; CHECK: ret <4 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64> +; CHECK-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x i64> [[TMP6]] ; entry: %0 = bitcast <4 x i64> %a to <8 x i32> From e6f332ef1e414ec41a188217d7547a371ed975de Mon Sep 17 00:00:00 2001 From: David Stenberg Date: Tue, 29 Sep 2020 11:04:13 +0200 Subject: [PATCH 007/544] [IndVarSimplify] Fix Modified status for removal of overflow intrinsics When removing an overflow intrinsic the Changed status in SimplifyIndvar was not set, leading to the IndVarSimplify pass returning an incorrect status. This was caught using the check introduced by D80916. As pointed out in the code review, a similar bug may exist for eliminateTrunc(). Reviewed By: reames Differential Revision: https://reviews.llvm.org/D85971 --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 1 + .../eliminate-overflow-modified.ll | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 llvm/test/Transforms/IndVarSimplify/eliminate-overflow-modified.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index d3d0c33419085..2d71b0fff8894 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -477,6 +477,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { if (WO->use_empty()) WO->eraseFromParent(); + Changed = true; return true; } diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-overflow-modified.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-overflow-modified.ll new file mode 100644 index 0000000000000..c3aea2621eb96 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-overflow-modified.ll @@ -0,0 +1,46 @@ +; RUN: opt < %s -indvars -S -o - | FileCheck %s + +; When eliminating the overflow intrinsic the indvars pass would incorrectly +; return a false Modified status. This was caught by the pass return +; status check that is hidden under EXPENSIVE_CHECKS. + +; CHECK-LABEL: for.body: +; CHECK-NEXT: %0 = phi i16 [ %1, %for.body ], [ undef, %for.body.preheader ] +; CHECK-NEXT: %1 = add nsw i16 %0, -1 +; CHECK-NEXT: %cmp = icmp sgt i16 %1, 0 +; CHECK-NEXT: call void @llvm.assume(i1 %cmp) + +; Function Attrs: nounwind +define void @foo() #0 { +entry: + %cmp1 = icmp sgt i16 undef, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %0 = phi i16 [ %2, %for.body ], [ undef, %for.body.preheader ] + %1 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %0, i16 -1) + %2 = extractvalue { i16, i1 } %1, 0 + %cmp = icmp sgt i16 %2, 0 + call void @llvm.assume(i1 %cmp) + br label %for.body + +for.end: ; preds = %entry + ret void +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) #1 + +; Function Attrs: nounwind willreturn +declare void @llvm.assume(i1) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { nounwind willreturn } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 12.0.0"} From c5a4900e1a00e88df9d6d9bc39594ff8afd0d9b5 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 29 Sep 2020 13:35:25 +0200 Subject: [PATCH 008/544] [AArch64] Add BTI to CFI jumptables. With branch protection the jump to the jump table entries requires a landing pad. Reviewed By: eugenis, tamas.petz Differential Revision: https://reviews.llvm.org/D81251 --- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 18 ++++++++- .../LowerTypeTests/aarch64-jumptable.ll | 39 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LowerTypeTests/aarch64-jumptable.ll diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 26f5336e0193c..17ffb27efa5a5 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1205,6 +1205,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { static const unsigned kX86JumpTableEntrySize = 8; static const unsigned kARMJumpTableEntrySize = 4; +static const unsigned kARMBTIJumpTableEntrySize = 8; unsigned LowerTypeTestsModule::getJumpTableEntrySize() { switch (Arch) { @@ -1213,7 +1214,12 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { return kX86JumpTableEntrySize; case Triple::arm: case Triple::thumb: + return kARMJumpTableEntrySize; case Triple::aarch64: + if (const auto *BTE = mdconst::extract_or_null( + M.getModuleFlag("branch-target-enforcement"))) + if (BTE->getZExtValue()) + return kARMBTIJumpTableEntrySize; return kARMJumpTableEntrySize; default: report_fatal_error("Unsupported architecture for jump tables"); @@ -1232,7 +1238,13 @@ void LowerTypeTestsModule::createJumpTableEntry( if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) { AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n"; AsmOS << "int3\nint3\nint3\n"; - } else if (JumpTableArch == Triple::arm || JumpTableArch == Triple::aarch64) { + } else if (JumpTableArch == Triple::arm) { + AsmOS << "b $" << ArgIndex << "\n"; + } else if (JumpTableArch == Triple::aarch64) { + if (const auto *BTE = mdconst::extract_or_null( + Dest->getParent()->getModuleFlag("branch-target-enforcement"))) + if (BTE->getZExtValue()) + AsmOS << "bti c\n"; AsmOS << "b $" << ArgIndex << "\n"; } else if (JumpTableArch == Triple::thumb) { AsmOS << "b.w $" << ArgIndex << "\n"; @@ -1394,6 +1406,10 @@ void LowerTypeTestsModule::createJumpTable( // by Clang for -march=armv7. F->addFnAttr("target-cpu", "cortex-a8"); } + if (JumpTableArch == Triple::aarch64) { + F->addFnAttr("branch-target-enforcement", "false"); + F->addFnAttr("sign-return-address", "none"); + } // Make sure we don't emit .eh_frame for this function. F->addFnAttr(Attribute::NoUnwind); diff --git a/llvm/test/Transforms/LowerTypeTests/aarch64-jumptable.ll b/llvm/test/Transforms/LowerTypeTests/aarch64-jumptable.ll new file mode 100644 index 0000000000000..e392c1cc6d2ed --- /dev/null +++ b/llvm/test/Transforms/LowerTypeTests/aarch64-jumptable.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -lowertypetests -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck --check-prefixes=AARCH64 %s + +; Test for the jump table generation with branch protection on AArch64 + +target datalayout = "e-p:64:64" + +@0 = private unnamed_addr constant [2 x void (...)*] [void (...)* bitcast (void ()* @f to void (...)*), void (...)* bitcast (void ()* @g to void (...)*)], align 16 + +; AARCH64: @f = alias void (), void ()* @[[JT:.*]] + +define void @f() !type !0 { + ret void +} + +define internal void @g() !type !0 { + ret void +} + +!0 = !{i32 0, !"typeid1"} + +declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone + +define i1 @foo(i8* %p) { + %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1") + ret i1 %x +} + +!llvm.module.flags = !{!1} + +!1 = !{i32 4, !"branch-target-enforcement", i32 1} + +; AARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 { + +; AARCH64: bti c +; AARCH64-SAME: b $0 +; AARCH64-SAME: bti c +; AARCH64-SAME: b $1 + +; AARCH64: attributes #[[ATTR]] = { naked nounwind "branch-target-enforcement"="false" "sign-return-address"="none" From 9263931fcccdc99000c1de668bea330711333729 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 25 Aug 2020 17:05:19 +0300 Subject: [PATCH 009/544] [SYCL] Assume SYCL device functions are convergent SYCL device compiler (similar to other SPMD compilers) assumes that functions are convergent by default to avoid invalid transformations. This attribute can be removed if compiler can prove that function does not have convergent operations. Reviewed By: Naghasan Differential Revision: https://reviews.llvm.org/D87282 --- clang/lib/Frontend/CompilerInvocation.cpp | 3 ++- clang/test/CodeGenSYCL/convergent.cpp | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenSYCL/convergent.cpp diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 2d008d8a3fbef..42224339250d6 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2882,7 +2882,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.Coroutines = Opts.CPlusPlus20 || Args.hasArg(OPT_fcoroutines_ts); Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || - Args.hasArg(OPT_fconvergent_functions); + Opts.SYCLIsDevice || + Args.hasArg(OPT_fconvergent_functions); Opts.DoubleSquareBracketAttributes = Args.hasFlag(OPT_fdouble_square_bracket_attributes, diff --git a/clang/test/CodeGenSYCL/convergent.cpp b/clang/test/CodeGenSYCL/convergent.cpp new file mode 100644 index 0000000000000..784fb8976c271 --- /dev/null +++ b/clang/test/CodeGenSYCL/convergent.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -emit-llvm -disable-llvm-passes \ +// RUN: -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | \ +// RUN: FileCheck %s + +// CHECK-DAG: Function Attrs: +// CHECK-DAG-SAME: convergent +// CHECK-DAG-NEXT: define void @_Z3foov +void foo() { + int a = 1; +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + kernel_single_task([] { foo(); }); + return 0; +} From a59be54e611997f23d1e5a76cada448b5f98d47e Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 29 Sep 2020 14:29:58 +0200 Subject: [PATCH 010/544] [sanitizer] Don't build gmock for tests (follow-up to 82827244). A use of gmock was briefly added in a90229d6, but was soon removed in 82827244. This also removes it from the cmake files. --- compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt index 213d4e826b9ee..3c504022ebe7c 100644 --- a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt +++ b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt @@ -53,7 +53,6 @@ endforeach() set(SANITIZER_TEST_CFLAGS_COMMON ${COMPILER_RT_UNITTEST_CFLAGS} ${COMPILER_RT_GTEST_CFLAGS} - ${COMPILER_RT_GMOCK_CFLAGS} -I${COMPILER_RT_SOURCE_DIR}/include -I${COMPILER_RT_SOURCE_DIR}/lib -I${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common @@ -152,7 +151,7 @@ macro(add_sanitizer_tests_for_arch arch) generate_compiler_rt_tests(SANITIZER_TEST_OBJECTS SanitizerUnitTests "Sanitizer-${arch}-Test" ${arch} RUNTIME "${SANITIZER_COMMON_LIB}" - SOURCES ${SANITIZER_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE} ${COMPILER_RT_GMOCK_SOURCE} + SOURCES ${SANITIZER_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE} COMPILE_DEPS ${SANITIZER_TEST_HEADERS} DEPS gtest CFLAGS ${SANITIZER_TEST_CFLAGS_COMMON} ${extra_flags} @@ -208,7 +207,6 @@ if(ANDROID) add_executable(SanitizerTest ${SANITIZER_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE} - ${COMPILER_RT_GMOCK_SOURCE} $ $ $) From 7bae2bc5a8dd11c016c895e3a691fb93575773f3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Sep 2020 13:37:24 +0100 Subject: [PATCH 011/544] [LoopUtils] Only verify SE in builds with assertions. Follow up to 60b852092c98. --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index f2496c4a1ea19..ccb9b6d0bdb4c 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -725,8 +725,10 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LI->destroy(L); } +#ifndef NDEBUG if (SE) SE->verify(); +#endif } /// Checks if \p L has single exit through latch block except possibly From edeff6e642e66a5be05c11cb8b9b36b3383078ae Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 29 Sep 2020 13:20:37 +0200 Subject: [PATCH 012/544] [mlir][GPU] Improve constant sinking in kernel outlining The previous implementation did not support sinking simple expressions. In particular, it is often beneficial to sink dim operations. Differential Revision: https://reviews.llvm.org/D88439 --- .../GPU/Transforms/KernelOutlining.cpp | 114 ++++++++++-------- mlir/test/Dialect/GPU/outlining.mlir | 67 +++++++++- 2 files changed, 124 insertions(+), 57 deletions(-) diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index fcae3114188ae..689161ed1fa2d 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" using namespace mlir; @@ -32,10 +33,10 @@ static void createForAllDimensions(OpBuilder &builder, Location loc, } } -// Add operations generating block/thread ids and grid/block dimensions at the -// beginning of the `launchFuncOpBody` region. Add mapping from argument in -// entry block of `launchOpBody`, to the corresponding result value of the added -// operations. +/// Adds operations generating block/thread ids and grid/block dimensions at the +/// beginning of the `launchFuncOpBody` region. Add mapping from argument in +/// entry block of `launchOpBody`, to the corresponding result value of the +/// added operations. static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, Region &launchOpBody, BlockAndValueMapping &map) { @@ -53,8 +54,48 @@ static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, map.map(firstBlock.getArgument(indexOp.index()), indexOp.value()); } +/// Identifies operations that are beneficial to sink into kernels. These +/// operations may not have side-effects, as otherwise sinking (and hence +/// duplicating them) is not legal. static bool isSinkingBeneficiary(Operation *op) { - return isa(op); + return isa(op); +} + +/// For a given operation `op`, computes whether it is beneficial to sink the +/// operation into the kernel. An operation can be sunk if doing so does not +/// introduce new kernel arguments. Whether a value is already available in the +/// kernel (and hence does not introduce new arguments) is checked by +/// querying `availableValues`. +/// If an operand is not yet available, we recursively check whether it can be +/// made available by siking its defining op. +/// Operations that are indentified for sinking are added to `beneficiaryOps` in +/// the order the should appear in the kernel. Furthermore, `availableValues` is +/// updated with results that will be available after sinking the identified +/// ops. +static bool extractBeneficiaryOps(Operation *op, + llvm::SetVector &beneficiaryOps, + llvm::SetVector &availableValues) { + if (beneficiaryOps.count(op)) + return true; + + if (!isSinkingBeneficiary(op)) + return false; + + for (Value operand : op->getOperands()) { + // It is already visisble in the kernel, keep going. + if (availableValues.count(operand)) + continue; + // Else check whether it can be made available via sinking. + Operation *definingOp = operand.getDefiningOp(); + if (!definingOp || + !extractBeneficiaryOps(definingOp, beneficiaryOps, availableValues)) + return false; + } + // We will sink the operation, mark its results as now available. + beneficiaryOps.insert(op); + for (Value result : op->getResults()) + availableValues.insert(result); + return true; } LogicalResult mlir::sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp) { @@ -65,59 +106,30 @@ LogicalResult mlir::sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp) { llvm::SetVector sinkCandidates; getUsedValuesDefinedAbove(launchOpBody, sinkCandidates); - llvm::SetVector sunkValues; - llvm::SetVector sunkOperations; - for (Value operand : sinkCandidates) { + SmallVector worklist(sinkCandidates.begin(), sinkCandidates.end()); + llvm::SetVector toBeSunk; + for (Value operand : worklist) { Operation *operandOp = operand.getDefiningOp(); - if (!operandOp || !isSinkingBeneficiary(operandOp)) + if (!operandOp) continue; - // Only sink operations that do not create new sinkCandidates. - if (!llvm::all_of(operandOp->getOperands(), [&sinkCandidates](Value value) { - return sinkCandidates.count(value); - })) - continue; - sunkValues.insert(operand); - sunkOperations.insert(operandOp); + extractBeneficiaryOps(operandOp, toBeSunk, sinkCandidates); } // Insert operations so that the defs get cloned before uses. BlockAndValueMapping map; OpBuilder builder(launchOpBody); - DenseSet processed; - SmallVector clonedOps; - while (processed.size() != sunkOperations.size()) { - auto startSize = processed.size(); - for (Operation *sunkOperation : sunkOperations) { - if (processed.count(sunkOperation)) - continue; - - // Operation cant be cloned yet if any of its operands is also being sunk, - // but isnt cloned yet. - if (llvm::any_of( - sunkOperation->getOperands(), [&sunkValues, &map](Value value) { - return sunkValues.count(value) && !map.lookupOrNull(value); - })) - continue; - - Operation *clonedOp = builder.clone(*sunkOperation, map); - // Only replace uses within the launch op. - for (auto result : llvm::enumerate(sunkOperation->getResults())) { - auto replacement = clonedOp->getResult(result.index()); - for (auto &use : llvm::make_early_inc_range(result.value().getUses())) - if (use.getOwner()->getParentOfType() == launchOp) - use.set(replacement); - } - processed.insert(sunkOperation); - } - if (startSize == processed.size()) - return launchOp.emitError( - "found illegal cyclic dependency between operations while sinking"); + for (Operation *op : toBeSunk) { + Operation *clonedOp = builder.clone(*op, map); + // Only replace uses within the launch op. + for (auto pair : llvm::zip(op->getResults(), clonedOp->getResults())) + replaceAllUsesInRegionWith(std::get<0>(pair), std::get<1>(pair), + launchOp.body()); } return success(); } -// Outline the `gpu.launch` operation body into a kernel function. Replace -// `gpu.terminator` operations by `gpu.return` in the generated function. +/// Outline the `gpu.launch` operation body into a kernel function. Replace +/// `gpu.terminator` operations by `gpu.return` in the generated function. static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, StringRef kernelFnName, llvm::SetVector &operands) { @@ -191,9 +203,9 @@ gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp, return funcOp; } -// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching -// `kernelFunc`. The kernel func contains the body of the `gpu.launch` with -// constant region arguments inlined. +/// Replace `gpu.launch` operations with an `gpu.launch_func` operation +/// launching `kernelFunc`. The kernel func contains the body of the +/// `gpu.launch` with constant region arguments inlined. static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, gpu::GPUFuncOp kernelFunc, ValueRange operands) { @@ -257,7 +269,7 @@ class GpuKernelOutliningPass } private: - // Returns a gpu.module containing kernelFunc and all callees (recursive). + /// Returns a gpu.module containing kernelFunc and all callees (recursive). gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc, const SymbolTable &parentSymbolTable) { // TODO: This code cannot use an OpBuilder because it must be inserted into diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index 23a8b9d98881f..d43bbc2eb9927 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -60,7 +60,7 @@ func @launch() { // ----- // CHECK: module attributes {gpu.container_module} - +// CHECK-LABEL: @multiple_launches func @multiple_launches() { // CHECK: %[[CST:.*]] = constant 8 : index %cst = constant 8 : index @@ -88,13 +88,66 @@ func @multiple_launches() { // ----- -func @extra_constants(%arg0 : memref) { +// CHECK-LABEL: @extra_constants_not_inlined +func @extra_constants_not_inlined(%arg0: memref) { + // CHECK: %[[CST:.*]] = constant 8 : index + %cst = constant 8 : index + %cst2 = constant 2 : index + %c0 = constant 0 : index + %cst3 = "secret_constant"() : () -> index + // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}, %{{.*}}) {kernel = @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel} : (index, index, index, index, index, index, memref, index) -> () + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, + %grid_z = %cst) + threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, + %block_z = %cst) { + "use"(%cst2, %arg0, %cst3) : (index, memref, index) -> () + gpu.terminator + } + return +} + +// CHECK-LABEL: func @extra_constants_not_inlined_kernel(%{{.*}}: memref, %{{.*}}: index) +// CHECK: constant 2 + +// ----- + +// CHECK-LABEL: @extra_constants +// CHECK-SAME: %[[ARG0:.*]]: memref +func @extra_constants(%arg0: memref) { // CHECK: %[[CST:.*]] = constant 8 : index %cst = constant 8 : index %cst2 = constant 2 : index %c0 = constant 0 : index %cst3 = dim %arg0, %c0 : memref - // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}, %{{.*}}) {kernel = @extra_constants_kernel::@extra_constants_kernel} : (index, index, index, index, index, index, memref, index) -> () + // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]]) {kernel = @extra_constants_kernel::@extra_constants_kernel} : (index, index, index, index, index, index, memref) -> () + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, + %grid_z = %cst) + threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, + %block_z = %cst) { + "use"(%cst2, %arg0, %cst3) : (index, memref, index) -> () + gpu.terminator + } + return +} + +// CHECK-LABEL: func @extra_constants_kernel +// CHECK-SAME: %[[KARG0:.*]]: memref +// CHECK: constant 2 +// CHECK: constant 0 +// CHECK: dim %[[KARG0]] + +// ----- + +// CHECK-LABEL: @extra_constants_noarg +// CHECK-SAME: %[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref +func @extra_constants_noarg(%arg0: memref, %arg1: memref) { + // CHECK: %[[CST:.*]] = constant 8 : index + %cst = constant 8 : index + %cst2 = constant 2 : index + %c0 = constant 0 : index + // CHECK: dim %[[ARG1]] + %cst3 = dim %arg1, %c0 : memref + // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]], %{{.*}}) {kernel = @extra_constants_noarg_kernel::@extra_constants_noarg_kernel} : (index, index, index, index, index, index, memref, index) -> () gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, @@ -105,9 +158,10 @@ func @extra_constants(%arg0 : memref) { return } -// CHECK-LABEL: func @extra_constants_kernel(%{{.*}}: memref, %{{.*}}: index) -// CHECK: constant -// CHECK: constant +// CHECK-LABEL: func @extra_constants_noarg_kernel +// CHECK-SAME: %[[KARG0:.*]]: memref, %[[KARG1:.*]]: index +// CHECK: %[[KCST:.*]] = constant 2 +// CHECK: "use"(%[[KCST]], %[[KARG0]], %[[KARG1]]) // ----- @@ -135,6 +189,7 @@ func @multiple_uses(%arg0 : memref) { llvm.mlir.global internal @global(42 : i64) : !llvm.i64 +//CHECK-LABEL: @function_call func @function_call(%arg0 : memref) { %cst = constant 8 : index gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, From 75a5febe31cb2660c4f72d9745625704d29946e1 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 10 Sep 2020 15:59:36 +0200 Subject: [PATCH 013/544] [SystemZ] Don't emit PC-relative memory accesses to unaligned symbols. In the presence of packed structures (#pragma pack(1)) where elements are referenced through pointers, there will be stores/loads with alignment values matching the default alignments for the element types while the elements are in fact unaligned. Strictly speaking this is incorrect source code, but is unfortunately part of existing code and therefore now addressed. This patch improves the pattern predicate for PC-relative loads and stores by not only checking the alignment value of the instruction, but also making sure that the symbol (and element) itself is aligned. Fixes https://bugs.llvm.org/show_bug.cgi?id=44405 Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D87510 --- .../Target/SystemZ/SystemZISelDAGToDAG.cpp | 44 ++++ llvm/lib/Target/SystemZ/SystemZOperators.td | 12 +- llvm/test/CodeGen/SystemZ/int-move-10.ll | 209 ++++++++++++++++++ 3 files changed, 257 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/int-move-10.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 37328684399b4..9d90a4940cba1 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -338,6 +338,10 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // to X. bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + // Return true if N (a load or a store) fullfills the alignment + // requirements for a PC-relative access. + bool storeLoadIsAligned(SDNode *N) const; + // Try to expand a boolean SELECT_CCMASK using an IPM sequence. SDValue expandSelectBoolean(SDNode *Node); @@ -1460,6 +1464,46 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, canUseBlockOperation(StoreA, LoadB); } +bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { + + auto *MemAccess = cast(N); + TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize(); + SDValue BasePtr = MemAccess->getBasePtr(); + MachineMemOperand *MMO = MemAccess->getMemOperand(); + assert(MMO && "Expected a memory operand."); + + // The memory access must have a proper alignment and no index register. + if (MemAccess->getAlignment() < StoreSize || + !MemAccess->getOffset().isUndef()) + return false; + + // The MMO must not have an unaligned offset. + if (MMO->getOffset() % StoreSize != 0) + return false; + + // An access to GOT or the Constant Pool is aligned. + if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) + if ((PSV->isGOT() || PSV->isConstantPool())) + return true; + + // Check the alignment of a Global Address. + if (BasePtr.getNumOperands()) + if (GlobalAddressSDNode *GA = + dyn_cast(BasePtr.getOperand(0))) { + // The immediate offset must be aligned. + if (GA->getOffset() % StoreSize != 0) + return false; + + // The alignment of the symbol itself must be at least the store size. + const GlobalValue *GV = GA->getGlobal(); + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (GV->getPointerAlignment(DL).value() < StoreSize) + return false; + } + + return true; +} + void SystemZDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 81af5fd854db1..a5f29f29a706e 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -572,10 +572,8 @@ def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ // Aligned loads. class AlignedLoad - : PatFrag<(ops node:$addr), (load node:$addr), [{ - auto *Load = cast(N); - return Load->getAlignment() >= Load->getMemoryVT().getStoreSize(); -}]>; + : PatFrag<(ops node:$addr), (load node:$addr), + [{ return storeLoadIsAligned(N); }]>; def aligned_load : AlignedLoad; def aligned_asextloadi16 : AlignedLoad; def aligned_asextloadi32 : AlignedLoad; @@ -584,10 +582,8 @@ def aligned_azextloadi32 : AlignedLoad; // Aligned stores. class AlignedStore - : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ - auto *Store = cast(N); - return Store->getAlignment() >= Store->getMemoryVT().getStoreSize(); -}]>; + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), + [{ return storeLoadIsAligned(N); }]>; def aligned_store : AlignedStore; def aligned_truncstorei16 : AlignedStore; def aligned_truncstorei32 : AlignedStore; diff --git a/llvm/test/CodeGen/SystemZ/int-move-10.ll b/llvm/test/CodeGen/SystemZ/int-move-10.ll new file mode 100644 index 0000000000000..8b8b9ed1a94ae --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-move-10.ll @@ -0,0 +1,209 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test PC-relative memory accesses of globals with packed struct types. +; PC-relative memory accesses cannot be used when the address is not +; aligned. This can happen with programs like the following (which are not +; strictly correct): +; +; #pragma pack(1) +; struct { +; short a; +; int b; +; } c; +; +; void main() { +; int *e = &c.b; +; *e = 0; +; } +; + +%packed.i16i32 = type <{ i16, i32 }> +%packed.i16i32i16i32 = type <{ i16, i32, i16, i32 }> +%packed.i16i64 = type <{ i16, i64 }> +%packed.i8i16 = type <{ i8, i16 }> + +@A_align2 = global %packed.i16i32 zeroinitializer, align 2 +@B_align2 = global %packed.i16i32i16i32 zeroinitializer, align 2 +@C_align2 = global %packed.i16i64 zeroinitializer, align 2 +@D_align4 = global %packed.i16i32 zeroinitializer, align 4 +@E_align4 = global %packed.i16i32i16i32 zeroinitializer, align 4 +@F_align2 = global %packed.i8i16 zeroinitializer, align 2 + +;;; Stores + +; unaligned packed struct + 2 -> unaligned address +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: larl %r1, A_align2 +; CHECK: mvhi 2(%r1), 0 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @A_align2, i64 0, i32 1), align 4 + ret void +} + +; unaligned packed struct + 8 -> unaligned address +define void @f2() { +; CHECK-LABEL: f2: +; CHECK: larl %r1, B_align2 +; CHECK: mvhi 8(%r1), 0 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @B_align2, i64 0, i32 3), align 4 + ret void +} + +; aligned packed struct + 2 -> unaligned address +define void @f3() { +; CHECK-LABEL: f3: +; CHECK: larl %r1, D_align4 +; CHECK: mvhi 2(%r1), 0 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @D_align4, i64 0, i32 1), align 4 + ret void +} + +; aligned packed struct + 8 -> aligned address +define void @f4() { +; CHECK-LABEL: f4: +; CHECK: lhi %r0, 0 +; CHECK: strl %r0, E_align4+8 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @E_align4, i64 0, i32 3), align 4 + ret void +} + +define void @f5() { +; CHECK-LABEL: f5: +; CHECK: larl %r1, C_align2 +; CHECK: mvghi 2(%r1), 0 +; CHECK: br %r14 + store i64 0, i64* getelementptr inbounds (%packed.i16i64, %packed.i16i64* @C_align2, i64 0, i32 1), align 8 + ret void +} + +define void @f6() { +; CHECK-LABEL: f6: +; CHECK-NOT: sthrl + store i16 0, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + ret void +} + +define void @f7(i64* %Src) { +; CHECK-LABEL: f7: +; CHECK: lg %r0, 0(%r2) +; CHECK: larl %r1, D_align4 +; CHECK: st %r0, 2(%r1) +; CHECK: br %r14 + %L = load i64, i64* %Src + %T = trunc i64 %L to i32 + store i32 %T, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @D_align4, i64 0, i32 1), align 4 + ret void +} + +define void @f8(i64* %Src) { +; CHECK-LABEL: f8: +; CHECK-NOT: sthrl + %L = load i64, i64* %Src + %T = trunc i64 %L to i16 + store i16 %T, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + ret void +} + +;;; Loads + +; unaligned packed struct + 2 -> unaligned address +define i32 @f9() { +; CHECK-LABEL: f9: +; CHECK: larl %r1, A_align2 +; CHECK: l %r2, 2(%r1) +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @A_align2, i64 0, i32 1), align 4 + ret i32 %L +} + +; unaligned packed struct + 8 -> unaligned address +define i32 @f10() { +; CHECK-LABEL: f10: +; CHECK: larl %r1, B_align2 +; CHECK: l %r2, 8(%r1) +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @B_align2, i64 0, i32 3), align 4 + ret i32 %L +} + +; aligned packed struct + 2 -> unaligned address +define i32 @f11() { +; CHECK-LABEL: f11: +; CHECK: larl %r1, D_align4 +; CHECK: l %r2, 2(%r1) +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @D_align4, i64 0, i32 1), align 4 + ret i32 %L +} + +; aligned packed struct + 8 -> aligned address +define i32 @f12() { +; CHECK-LABEL: f12: +; CHECK: lrl %r2, E_align4+8 +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @E_align4, i64 0, i32 3), align 4 + ret i32 %L +} + +define i64 @f13() { +; CHECK-LABEL: f13: +; CHECK: larl %r1, C_align2 +; CHECK: lg %r2, 2(%r1) +; CHECK: br %r14 + %L = load i64, i64* getelementptr inbounds (%packed.i16i64, %packed.i16i64* @C_align2, i64 0, i32 1), align 8 + ret i64 %L +} + +define i32 @f14() { +; CHECK-LABEL: f14: +; CHECK-NOT: lhrl + %L = load i16, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + %ext = sext i16 %L to i32 + ret i32 %ext +} + +define i64 @f15() { +; CHECK-LABEL: f15: +; CHECK-NOT: llghrl + %L = load i16, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + %ext = zext i16 %L to i64 + ret i64 %ext +} + +;;; Loads folded into compare instructions + +define i32 @f16(i32 %src1) { +; CHECK-LABEL: f16: +; CHECK: larl %r1, A_align2 +; CHECK: c %r2, 2(%r1) +entry: + %src2 = load i32, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @A_align2, i64 0, i32 1), align 4 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +define i64 @f17(i64 %src1) { +; CHECK-LABEL: f17: +; CHECK: larl %r1, C_align2 +; CHECK: clg %r2, 2(%r1) +entry: + %src2 = load i64, i64* getelementptr inbounds (%packed.i16i64, %packed.i16i64* @C_align2, i64 0, i32 1), align 8 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} From d6b04f3937e374572039005d1446b4a950dc8f01 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 28 Sep 2020 12:44:43 +0100 Subject: [PATCH 014/544] [SDag] Refactor and simplify divergence calculation and checking. NFC. --- llvm/include/llvm/CodeGen/SelectionDAG.h | 3 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 64 +++++++++---------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 6e733f8c9b9c5..f86d46da23ce0 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1424,6 +1424,9 @@ class SelectionDAG { void setNodeMemRefs(MachineSDNode *N, ArrayRef NewMemRefs); + // Calculate divergence of node \p N based on its operands. + bool calculateDivergence(SDNode *N); + // Propagates the change in divergence to users void updateDivergence(SDNode * N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3e3d798711626..cfb4aa2f0bb53 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8718,21 +8718,31 @@ namespace { } // end anonymous namespace -void SelectionDAG::updateDivergence(SDNode * N) -{ - if (TLI->isSDNodeAlwaysUniform(N)) - return; - bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); +bool SelectionDAG::calculateDivergence(SDNode *N) { + if (TLI->isSDNodeAlwaysUniform(N)) { + assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, DA) && + "Conflicting divergence information!"); + return false; + } + if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA)) + return true; for (auto &Op : N->ops()) { - if (Op.Val.getValueType() != MVT::Other) - IsDivergent |= Op.getNode()->isDivergent(); + if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent()) + return true; } - if (N->SDNodeBits.IsDivergent != IsDivergent) { - N->SDNodeBits.IsDivergent = IsDivergent; - for (auto U : N->uses()) { - updateDivergence(U); + return false; +} + +void SelectionDAG::updateDivergence(SDNode *N) { + SmallVector Worklist(1, N); + do { + N = Worklist.pop_back_val(); + bool IsDivergent = calculateDivergence(N); + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + Worklist.insert(Worklist.end(), N->use_begin(), N->use_end()); } - } + } while (!Worklist.empty()); } void SelectionDAG::CreateTopologicalOrder(std::vector &Order) { @@ -8758,26 +8768,9 @@ void SelectionDAG::CreateTopologicalOrder(std::vector &Order) { void SelectionDAG::VerifyDAGDiverence() { std::vector TopoOrder; CreateTopologicalOrder(TopoOrder); - const TargetLowering &TLI = getTargetLoweringInfo(); - DenseMap DivergenceMap; - for (auto &N : allnodes()) { - DivergenceMap[&N] = false; - } - for (auto N : TopoOrder) { - bool IsDivergent = DivergenceMap[N]; - bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); - for (auto &Op : N->ops()) { - if (Op.Val.getValueType() != MVT::Other) - IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; - } - if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { - DivergenceMap[N] = true; - } - } - for (auto &N : allnodes()) { - (void)N; - assert(DivergenceMap[&N] == N.isDivergent() && - "Divergence bit inconsistency detected\n"); + for (auto *N : TopoOrder) { + assert(calculateDivergence(N) == N->isDivergent() && + "Divergence bit inconsistency detected"); } } #endif @@ -9963,13 +9956,14 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef Vals) { Ops[I].setUser(Node); Ops[I].setInitial(Vals[I]); if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. - IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + IsDivergent |= Ops[I].getNode()->isDivergent(); } Node->NumOperands = Vals.size(); Node->OperandList = Ops; - IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); - if (!TLI->isSDNodeAlwaysUniform(Node)) + if (!TLI->isSDNodeAlwaysUniform(Node)) { + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); Node->SDNodeBits.IsDivergent = IsDivergent; + } checkForCycles(Node); } From 781edd501c25ce1b526764e2b048e9e1c5a41728 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 28 Sep 2020 13:37:49 +0100 Subject: [PATCH 015/544] [SDag] Verify DAG divergence after dumping. NFC. When debugging, it's useful to be able to see the DAG that has just failed divergence verification. --- .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 65 ++++++++++++------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ffabe7a5b0411..3f3eb354bee31 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -779,6 +779,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -789,16 +794,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + // Second step, hack on the DAG until it only uses operations and types that // the target supports. if (ViewLegalizeTypesDAGs && MatchFilterBB) @@ -811,16 +816,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -835,15 +840,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); + +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif } { @@ -858,6 +863,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, GroupDescription, TimePassesIsEnabled); @@ -869,6 +879,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -899,16 +914,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -919,16 +934,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } -#ifndef NDEBUG - if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); -#endif - LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; CurDAG->dump()); +#ifndef NDEBUG + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); +#endif + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); From 6199219bbd8224b7cf69b4a538bd6bc49f6daaf0 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 29 Sep 2020 14:20:35 +0100 Subject: [PATCH 016/544] [mlir] Fix shared libs build The following change causes the shared libraries build (BUILD_SHARED_LIBS=On) to fail: * https://reviews.llvm.org/D88351 This patch will fix that. Differential Revision: https://reviews.llvm.org/D88484 --- mlir/lib/Target/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt index ce67586e60dc9..cdc9e2db9cd16 100644 --- a/mlir/lib/Target/CMakeLists.txt +++ b/mlir/lib/Target/CMakeLists.txt @@ -16,6 +16,7 @@ add_mlir_translation_library(MLIRTargetLLVMIRModuleTranslation LINK_LIBS PUBLIC MLIRLLVMIR + MLIROpenMP MLIRLLVMIRTransforms MLIRTranslation ) @@ -51,7 +52,6 @@ add_mlir_translation_library(MLIRTargetLLVMIR IRReader LINK_LIBS PUBLIC - MLIROpenMP MLIRTargetLLVMIRModuleTranslation ) From 8b08fa0103c8d8e624b19fad5a5006e7a783ecb7 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 29 Sep 2020 15:29:26 +0200 Subject: [PATCH 017/544] Revert "[AMDGPU] Reorganize GCN subtarget features for unaligned access" This reverts commit f5cd7ec9f3fc969ff5e1feed961996844333de3b. Certain rocPRIM/rocThrust/hipCUB tests were failing because of this change. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 32 ++++++++----------- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 ++-- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 4 +-- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 18 +++-------- .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +-- .../AMDGPU/GlobalISel/load-constant.96.ll | 8 ++--- .../CodeGen/AMDGPU/amdgpu.private-memory.ll | 10 +++--- llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll | 2 +- .../fast-unaligned-load-store.global.ll | 6 ++-- .../CodeGen/AMDGPU/unaligned-load-store.ll | 2 +- .../llc-target-cpu-attr-from-cmdline-ir.mir | 6 ++-- .../llc-target-cpu-attr-from-cmdline.mir | 4 +-- .../AMDGPU/adjust-alloca-alignment.ll | 8 ++--- .../AMDGPU/merge-stores.ll | 2 +- .../AMDGPU/multiple_tails.ll | 25 ++++++--------- 16 files changed, 61 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 3e8cd60b7d77a..6624ff00ecf64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", "UnalignedBufferAccess", "true", - "Hardware supports unaligned global loads and stores" + "Support unaligned global loads and stores" >; def FeatureTrapHandler: SubtargetFeature<"trap-handler", @@ -105,10 +105,18 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", "Support unaligned scratch loads and stores" >; +// LDS alignment enforcement is controlled by a configuration register: +// SH_MEM_CONFIG.alignment_mode +def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", + "UnalignedAccessMode", + "true", + "Support unaligned local and region loads and stores" +>; + def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", "UnalignedDSAccess", "true", - "Hardware supports unaligned local and region loads and stores" + "Does not requires 16 byte alignment for certain local and region loads and stores" >; def FeatureApertureRegs : SubtargetFeature<"aperture-regs", @@ -645,15 +653,6 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", "Requires use of fract on arguments to trig instructions" >; -// Alignment enforcement is controlled by a configuration register: -// SH_MEM_CONFIG.alignment_mode -def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", - "UnalignedAccessMode", - "true", - "Enable unaligned global, local and region loads and stores if the hardware" - " supports it" ->; - // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -680,8 +679,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, - FeatureUnalignedBufferAccess] + FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC] >; def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", @@ -694,8 +692,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32, - FeatureUnalignedBufferAccess + FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32 ] >; @@ -712,8 +709,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, - FeatureFastDenormalF32, FeatureUnalignedBufferAccess, - FeatureUnalignedDSAccess + FeatureFastDenormalF32, FeatureUnalignedDSAccess ] >; @@ -732,7 +728,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC, FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureUnalignedDSAccess ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1a9797eb401dc..0f1eb03f0c27d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1068,9 +1068,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; }; - unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32; - unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16; - unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8; + unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; + unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; + unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; // TODO: Refine based on subtargets which support unaligned access or 128-bit // LDS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index c03d24016cac2..071354673ba21 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -81,7 +81,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS @@ -186,6 +186,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, AutoWaitcntBeforeBarrier(false), CodeObjectV3(false), UnalignedScratchAccess(false), + UnalignedBufferAccess(false), UnalignedAccessMode(false), HasApertureRegs(false), @@ -257,7 +258,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasUnpackedD16VMem(false), LDSMisalignedBug(false), HasMFMAInlineLiteralBug(false), - UnalignedBufferAccess(false), UnalignedDSAccess(false), ScalarizeGlobal(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index ce669bb250cae..52d1f18513e18 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -318,6 +318,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, bool AutoWaitcntBeforeBarrier; bool CodeObjectV3; bool UnalignedScratchAccess; + bool UnalignedBufferAccess; bool UnalignedAccessMode; bool HasApertureRegs; bool EnableXNACK; @@ -398,7 +399,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; - bool UnalignedBufferAccess; bool UnalignedDSAccess; bool ScalarizeGlobal; @@ -706,18 +706,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, return UnalignedBufferAccess; } - bool hasUnalignedBufferAccessEnabled() const { - return UnalignedBufferAccess && UnalignedAccessMode; - } - - bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; - } - - bool hasUnalignedDSAccessEnabled() const { - return UnalignedDSAccess && UnalignedAccessMode; - } - bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; } @@ -726,6 +714,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, return UnalignedAccessMode; } + bool hasUnalignedDSAccess() const { + return UnalignedDSAccess; + } + bool hasApertureRegs() const { return HasApertureRegs; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 7929d5bbbedcc..aa3cc75d5da07 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -88,6 +88,7 @@ class GCNTTIImpl final : public BasicTTIImplBase { AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, AMDGPU::FeaturePromoteAlloca, + AMDGPU::FeatureUnalignedBufferAccess, AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b7b8845446195..73408346fbae9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1433,7 +1433,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( AddrSpace == AMDGPUAS::REGION_ADDRESS) { // Check if alignment requirements for ds_read/write instructions are // disabled. - if (Subtarget->hasUnalignedDSAccessEnabled() && + if (Subtarget->hasUnalignedDSAccess() && + Subtarget->hasUnalignedAccessMode() && !Subtarget->hasLDSMisalignedBug()) { if (IsFast) *IsFast = Alignment != Align(2); @@ -1483,7 +1484,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( return AlignedBy4; } - if (Subtarget->hasUnalignedBufferAccessEnabled() && + if (Subtarget->hasUnalignedBufferAccess() && !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS)) { // If we have an uniform constant load, it still requires using a slow diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index 7ff3fffdfbc76..6dceaf2e22fa8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s ; FIXME: ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 388123b6210a2..072a76780447e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,10 +1,10 @@ ; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s ; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index d03ca166d992b..da86b8104b8ec 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s define <2 x half> @chain_hi_to_lo_private() { ; GCN-LABEL: chain_hi_to_lo_private: diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll index 3b6396f8b63fc..a5c98a1b49208 100644 --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; Should not merge this to a dword load define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll index 5d5cfd318edfb..ea60d0d00432f 100644 --- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s ; SI-LABEL: {{^}}local_unaligned_load_store_i16: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir index 4272ead86f2d0..ccbc4ed877955 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir @@ -1,5 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # FIXME: This overrides attributes that already are present. It should probably # only touch functions without an existing attribute. @@ -10,8 +10,8 @@ # MCPU: attributes #0 = { "target-cpu"="fiji" } # MCPU: attributes #1 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" } -# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" } +# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" } +# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" } --- | define amdgpu_kernel void @with_cpu_attr() #0 { diff --git a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir index fa94e3c76a7a8..bd16888bf07ea 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir @@ -1,10 +1,10 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s -# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s +# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s # The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function. # MCPU: attributes #0 = { "target-cpu"="hawaii" } -# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" } +# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" } --- name: no_ir diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll index a46f4d4175b7c..9f85fec33ba14 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s target triple = "amdgcn--" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll index c1889cd0a9b9b..60cac116b87b3 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=hawaii -load-store-vectorizer -S -o - %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s ; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll index 4105b4013ec35..eb3b177ba6d65 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX7 %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" @@ -31,20 +31,13 @@ define amdgpu_kernel void @no_crash(i32 %arg) { ; GCN-LABEL: @interleave_get_longest -; GFX7: load <2 x i32> -; GFX7: load i32 -; GFX7: store <2 x i32> zeroinitializer -; GFX7: load i32 -; GFX7: load <2 x i32> -; GFX7: load i32 -; GFX7: load i32 - -; GFX9: load <4 x i32> -; GFX9: load i32 -; GFX9: store <2 x i32> zeroinitializer -; GFX9: load i32 -; GFX9: load i32 -; GFX9: load i32 +; GCN: load <2 x i32> +; GCN: load i32 +; GCN: store <2 x i32> zeroinitializer +; GCN: load i32 +; GCN: load <2 x i32> +; GCN: load i32 +; GCN: load i32 define amdgpu_kernel void @interleave_get_longest(i32 %arg) { %a1 = add i32 %arg, 1 From f34ae1b9de68152de037fd3e394d196b997c4296 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 29 Sep 2020 15:50:19 +0200 Subject: [PATCH 018/544] [AArch64] Add v8.5 Branch Target Identification support. The .note.gnu.property must be in the assembly file to indicate the support for BTI otherwise BTI will be disabled for the whole library. __unw_getcontext and libunwind::Registers_arm64::jumpto() may be called indirectly therefore they should start with a landing pad. Reviewed By: tamas.petz, #libunwind, compnerd Differential Revision: https://reviews.llvm.org/D77786 --- libunwind/src/assembly.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h index 4cf179e13edc2..3b1e6e6d01d7a 100644 --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -48,6 +48,24 @@ #define PPC64_OPD2 #endif +#if defined(__ARM_FEATURE_BTI_DEFAULT) + .pushsection ".note.gnu.property", "a" SEPARATOR \ + .balign 8 SEPARATOR \ + .long 4 SEPARATOR \ + .long 0x10 SEPARATOR \ + .long 0x5 SEPARATOR \ + .asciz "GNU" SEPARATOR \ + .long 0xc0000000 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ \ + .long 4 SEPARATOR \ + .long 3 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_BTI AND */ \ + /* GNU_PROPERTY_AARCH64_FEATURE_1_PAC */ \ + .long 0 SEPARATOR \ + .popsection SEPARATOR +#define AARCH64_BTI bti c +#else +#define AARCH64_BTI +#endif + #define GLUE2(a, b) a ## b #define GLUE(a, b) GLUE2(a, b) #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) @@ -144,7 +162,8 @@ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ PPC64_OPD1 \ SYMBOL_NAME(name): \ - PPC64_OPD2 + PPC64_OPD2 \ + AARCH64_BTI #if defined(__arm__) #if !defined(__ARM_ARCH) From 074ab233ed620c1afa44e5bc2d86ab448a9ce1ed Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 29 Sep 2020 08:07:08 -0400 Subject: [PATCH 019/544] [mlir][Linalg] Refactor Linalg creation of loops to allow passing iterArgs - NFC This revision changes the signatures of helper function that Linalg uses to create loops so that they can also take iterArgs. iterArgs are asserted empty to ensure no functional change. This is a mechanical change in preparation of tiling on linalg on tensors to avoid polluting the implementation with an NFC change. Differential Revision: https://reviews.llvm.org/D88480 --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 9 +++--- mlir/include/mlir/Dialect/SCF/EDSC/Builders.h | 4 +++ mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 5 ++- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 5 +-- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 31 +++++++++++++------ mlir/lib/Dialect/SCF/EDSC/Builders.cpp | 19 ++++++++++++ 6 files changed, 57 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 35353adf11ed4..aca5a981b0034 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -214,10 +214,11 @@ struct GenerateLoopNest { typename std::conditional::value, AffineIndexedValue, StdIndexedValue>::type; - static void doit(ArrayRef loopRanges, - ArrayRef iteratorTypes, - function_ref bodyBuilderFn, - Optional = None); + static void + doit(ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef iteratorTypes, + function_ref bodyBuilderFn, + Optional = None); }; } // namespace linalg diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h index 50adec2f9b8bd..fe8df4c2d0e44 100644 --- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h @@ -32,6 +32,10 @@ scf::ValueVector loopNestBuilder(Value lb, Value ub, Value step, scf::ValueVector loopNestBuilder( Value lb, Value ub, Value step, ValueRange iterArgInitValues, function_ref fun = nullptr); +scf::ValueVector loopNestBuilder( + ValueRange lbs, ValueRange ubs, ValueRange steps, + ValueRange iterArgInitValues, + function_ref fun = nullptr); /// Adapters for building if conditions using the builder and the location /// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index d3c90ffab06fd..eb452cc40305b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -515,9 +515,12 @@ Optional linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) { map, getViewSizes(builder, linalgOp)); SmallVector allIvs; GenerateLoopNest::doit( - loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) { + loopRanges, /*iterInitArgs*/ {}, linalgOp.iterator_types().getValue(), + [&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector { + assert(iterArgs.empty() && "unexpected iterArgs"); allIvs.append(ivs.begin(), ivs.end()); emitScalarImplementation(allIvs, linalgOp); + return scf::ValueVector{}; }); // Number of loop ops might be different from the number of ivs since some // loops like affine.parallel and scf.parallel have multiple ivs. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index daaad2e6fa4be..676caa145c3a2 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -386,8 +386,8 @@ Optional static tileLinalgOpImpl( if (!options.interchangeVector.empty()) applyPermutationToVector(iteratorTypes, options.interchangeVector); GenerateLoopNest::doit( - loopRanges, iteratorTypes, - [&](ValueRange localIvs) { + loopRanges, /*iterArgInitValues*/ {}, iteratorTypes, + [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); ivs.assign(localIvs.begin(), localIvs.end()); @@ -406,6 +406,7 @@ Optional static tileLinalgOpImpl( auto operands = getAssumedNonViewOperands(op); views.append(operands.begin(), operands.end()); res = op.clone(b, loc, views); + return scf::ValueVector{}; }, options.distribution); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 585b00189964d..204716b407466 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -194,20 +194,23 @@ getLoopRanges(OpBuilder &builder, LinalgOp linalgOp, OperationFolder *folder) { /// Specialization to build an scf "for" nest. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef iteratorTypes, + function_ref bodyBuilderFn, Optional) { SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); - edsc::loopNestBuilder(lbs, ubs, steps, bodyBuilderFn); + edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn); } /// Specialization to build affine "for" nest. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef iteratorTypes, + function_ref bodyBuilderFn, Optional) { + assert(iterArgInitValues.empty() && "unexpected AffineForOp init values"); SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); @@ -220,7 +223,11 @@ void GenerateLoopNest::doit( constantSteps.push_back(op.getValue()); } - edsc::affineLoopNestBuilder(lbs, ubs, constantSteps, bodyBuilderFn); + auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) { + bodyBuilderFn(ivs, {}); + }; + edsc::affineLoopNestBuilder(lbs, ubs, constantSteps, + bodyBuilderWithoutIterArgsFn); } /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`. @@ -357,9 +364,11 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps, /// Specialization for generating a mix of parallel and sequential scf loops. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef iteratorTypes, + function_ref bodyBuilderFn, Optional distributionOptions) { + assert(iterArgInitValues.empty() && "unexpected ParallelOp init values"); // This function may be passed more iterator types than ranges. assert(iteratorTypes.size() >= loopRanges.size() && "expected iterator type for all ranges"); @@ -405,7 +414,11 @@ void GenerateLoopNest::doit( } } ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage); - generateParallelLoopNest(lbs, ubs, steps, iteratorTypes, bodyBuilderFn, ivs, + auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) { + bodyBuilderFn(ivs, {}); + }; + generateParallelLoopNest(lbs, ubs, steps, iteratorTypes, + bodyBuilderWithoutIterArgsFn, ivs, distributionMethod); assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops"); diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp index 2098ca1bf7d00..45097186a248a 100644 --- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp @@ -61,6 +61,25 @@ mlir::scf::ValueVector mlir::edsc::loopNestBuilder( }); } +mlir::scf::ValueVector mlir::edsc::loopNestBuilder( + ValueRange lbs, ValueRange ubs, ValueRange steps, + ValueRange iterArgInitValues, + function_ref fun) { + // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into + // the expected function interface. + assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); + return mlir::scf::buildLoopNest( + ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs, + steps, iterArgInitValues, + [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) { + ScopedContext context(builder, loc); + if (fun) + return fun(ivs, args); + return scf::ValueVector(iterArgInitValues.begin(), + iterArgInitValues.end()); + }); +} + static std::function wrapIfBody(function_ref body, TypeRange expectedTypes) { (void)expectedTypes; From 113114a5da60ef30731046f50fc1d67ff87897fc Mon Sep 17 00:00:00 2001 From: Dominik Montada Date: Mon, 28 Sep 2020 16:38:35 +0200 Subject: [PATCH 020/544] [GlobalISel] fix widenScalarUnmerge if widen type is not a multiple of destination type Fix creation of illegal unmerge when widen was requested to a type which is not a multiple of the destination type. E.g. when trying to widen an s48 unmerge to s64 the existing code would create an illegal unmerge from s64 to s48. Instead, create further unmerges to a GCD type, then use this to remerge these intermediate results to the actual destinations. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D88422 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 71 +++++++++++++------ .../GlobalISel/LegalizerHelperTest.cpp | 44 ++++++++++++ 2 files changed, 92 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f8ca0e85ee823..e8bc4067c127e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1605,35 +1605,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); - // Create a sequence of unmerges to the original results. since we may have - // widened the source, we will need to pad the results with dead defs to cover - // the source register. - // e.g. widen s16 to s32: - // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48) + // Create a sequence of unmerges and merges to the original results. Since we + // may have widened the source, we will need to pad the results with dead defs + // to cover the source register. + // e.g. widen s48 to s64: + // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) // // => - // %4:_(s64) = G_ANYEXT %0:_(s48) - // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge - // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs - // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def - + // %4:_(s192) = G_ANYEXT %0:_(s96) + // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge + // ; unpack to GCD type, with extra dead defs + // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) + // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) + // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) + // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination + // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination + const LLT GCDTy = getGCDType(WideTy, DstTy); const int NumUnmerge = Unmerge->getNumOperands() - 1; - const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); - - for (int I = 0; I != NumUnmerge; ++I) { - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - - for (int J = 0; J != PartsPerUnmerge; ++J) { - int Idx = I * PartsPerUnmerge + J; - if (Idx < NumDst) - MIB.addDef(MI.getOperand(Idx).getReg()); - else { - // Create dead def for excess components. - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); + + // Directly unmerge to the destination without going through a GCD type + // if possible + if (PartsPerRemerge == 1) { + const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) { + int Idx = I * PartsPerUnmerge + J; + if (Idx < NumDst) + MIB.addDef(MI.getOperand(Idx).getReg()); + else { + // Create dead def for excess components. + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + } } + + MIB.addUse(Unmerge.getReg(I)); } + } else { + SmallVector Parts; + for (int J = 0; J != NumUnmerge; ++J) + extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); + + SmallVector RemergeParts; + for (int I = 0; I != NumDst; ++I) { + for (int J = 0; J < PartsPerRemerge; ++J) { + const int Idx = I * PartsPerRemerge + J; + RemergeParts.emplace_back(Parts[Idx]); + } - MIB.addUse(Unmerge.getReg(I)); + MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); + RemergeParts.clear(); + } } MI.eraseFromParent(); diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 66f5804479fa5..feb0c2366a95c 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -3135,4 +3135,48 @@ TEST_F(AArch64GISelMITest, FewerElementsInsertVectorElt) { EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// Test widen scalar of G_UNMERGE_VALUES +TEST_F(AArch64GISelMITest, widenScalarUnmerge) { + setUp(); + if (!TM) + return; + + DefineLegalizerInfo(A, {}); + + LLT S96{LLT::scalar(96)}; + LLT S64{LLT::scalar(64)}; + LLT S48{LLT::scalar(48)}; + + auto Src = B.buildAnyExt(S96, Copies[0]); + auto Unmerge = B.buildUnmerge(S48, Src); + + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + + // Perform Legalization + B.setInsertPt(*EntryMBB, Unmerge->getIterator()); + + // This should create unmerges to a GCD type (S16), then remerge to S48 + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.widenScalar(*Unmerge, 0, S64)); + + const auto *CheckStr = R"( + CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY + CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[COPY0]] + CHECK: [[ANYEXT1:%[0-9]+]]:_(s192) = G_ANYEXT [[ANYEXT]] + CHECK: [[UNMERGE:%[0-9]+]]:_(s64), [[UNMERGE1:%[0-9]+]]:_(s64), [[UNMERGE2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT1]] + CHECK: [[UNMERGE3:%[0-9]+]]:_(s16), [[UNMERGE4:%[0-9]+]]:_(s16), [[UNMERGE5:%[0-9]+]]:_(s16), [[UNMERGE6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UNMERGE]] + CHECK: [[UNMERGE7:%[0-9]+]]:_(s16), [[UNMERGE8:%[0-9]+]]:_(s16), [[UNMERGE9:%[0-9]+]]:_(s16), [[UNMERGE10:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UNMERGE1]] + CHECK: [[UNMERGE11:%[0-9]+]]:_(s16), [[UNMERGE12:%[0-9]+]]:_(s16), [[UNMERGE13:%[0-9]+]]:_(s16), [[UNMERGE14:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UNMERGE2]] + CHECK: [[MERGE:%[0-9]+]]:_(s48) = G_MERGE_VALUES [[UNMERGE3]]:_(s16), [[UNMERGE4]]:_(s16), [[UNMERGE5]]:_(s16) + CHECK: [[MERGE1:%[0-9]+]]:_(s48) = G_MERGE_VALUES [[UNMERGE6]]:_(s16), [[UNMERGE7]]:_(s16), [[UNMERGE8]]:_(s16) + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + } // namespace From 6b649570cbc44dd775d9657805cc60b2075d8011 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 29 Sep 2020 08:23:37 -0400 Subject: [PATCH 021/544] [mlir][Linalg] Refactor Linalg op initTensors support - NFC Manually-defined named ops do not currently support `init_tensors` or return values and may never support them. Add extra interface to the StructuredOpInterface so that we can still write op-agnostic transformations based on StructuredOpInterface. This is an NFC extension in preparation for tiling on tensors. Differential Revision: https://reviews.llvm.org/D88481 --- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 33 +++++++-- .../Linalg/IR/LinalgStructuredOpsInterface.td | 69 ++++++++++++++++--- .../mlir/Dialect/Linalg/IR/LinalgTraits.h | 14 ++++ mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 2 +- 5 files changed, 101 insertions(+), 19 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index ed87689822e5f..d123229337370 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -22,14 +22,19 @@ include "mlir/Interfaces/CopyOpInterface.td" // The Linalg `NInputs` trait provides the API for ops that are known // to have a specified number of inputs, all passed as operands. // See Linalg/LinalgTraits.h for implementation details and usage. -class NInputs : - NativeOpTrait<"linalg::NInputs<" # !cast(args_in) # ">::Impl"> {} +class NInputs : + NativeOpTrait<"linalg::NInputs<" # !cast(n) # ">::Impl"> {} + +// The Linalg `ZeroInitTensors` trait provides the API for ops that are known +// to not have input tensor operands. +// See Linalg/LinalgTraits.h for implementation details and usage. +def ZeroInitTensors : NativeOpTrait<"linalg::ZeroInitTensors"> {} // The Linalg `NOutputs` trait provides the API for ops that are known // to have a specified number of outputs, all passed as operands. // See Linalg/LinalgTraits.h for implementation details and usage. -class NOutputs : - NativeOpTrait<"linalg::NOutputs<" # !cast(args_out) # ">::Impl"> {} +class NOutputs : + NativeOpTrait<"linalg::NOutputs<" # !cast(n) # ">::Impl"> {} def StructuredOpTraits : NativeOpTrait<"linalg::StructuredOpTraits">; def NamedStructuredOpTrait : NativeOpTrait<"linalg::NamedStructuredOpTrait">; @@ -62,6 +67,7 @@ class LinalgStructured_Op props> def CopyOp : LinalgStructured_Op<"copy", [ CopyOpInterface, NInputs<1>, + ZeroInitTensors, NOutputs<1> ]> { let description = [{ @@ -159,7 +165,10 @@ def CopyOp : LinalgStructured_Op<"copy", [ let hasCanonicalizer = 1; } -def FillOp : LinalgStructured_Op<"fill", [NInputs<0>, NOutputs<1>]> { +def FillOp : LinalgStructured_Op<"fill", [ + NInputs<0>, + ZeroInitTensors, + NOutputs<1>]> { let arguments = (ins AnyStridedMemRef:$output, AnyTypeOf<[AnyFloat, AnySignlessInteger, AnyVector]>:$value); @@ -254,7 +263,12 @@ class PoolingBase_Op props> }]; } -def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { +def ConvOp : PoolingBase_Op<"conv", [ + NInputs<2>, + // Despite having reductions, this manually defined ConvOp may only take + // memref operands and can never have init tensors. + ZeroInitTensors, + NOutputs<1>]> { let description = [{ Generic n-D convolution as described in the TF documentation: @@ -371,7 +385,12 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { } class SingleInputPoolingBase_Op - : PoolingBase_Op, NOutputs<1>]> { + : PoolingBase_Op, + // Despite having reductions, this manually defined ConvOp may only take + // memref operands and can never have init tensors. + ZeroInitTensors, + NOutputs<1>]> { let description = [{ A base class for single input pooling function. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 17e16a15d39a3..23d296c392ff9 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -125,13 +125,12 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { getNumIterators(getReductionIteratorTypeName(), iters) == 1; }]>, //===------------------------------------------------------------------===// - // Num input/output arguments handling. + // Num input/output/initTensors arguments handling. //===------------------------------------------------------------------===// // These special methods must be defined by each op that wants to implement // the LinalgStructuredInterface. For now, this is either: - // - inherited statically by using the NInputs or - // NOutputs traits. - // - derived from args_in/args_out attributes (for linalg.generic and + // - Explicitly specified in the op definition. + // - Derived from variadic attributes (for "named" ops, linalg.generic and // linalg.indexed_generic ops). InterfaceMethod< /*desc=*/[{ @@ -140,6 +139,13 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*retTy=*/"unsigned", /*methodName=*/"getNumInputs" >, + InterfaceMethod< + /*desc=*/[{ + Return the number of init tensors. + }], + /*retTy=*/"unsigned", + /*methodName=*/"getNumInitTensors" + >, InterfaceMethod< /*desc=*/[{ Return the number of outputs. @@ -371,6 +377,46 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { return {range.begin(), range.begin() + getNumInputsAndOutputBuffers()}; }] >, + InterfaceMethod< + /*desc=*/[{ + Return the range over init tensors. + }], + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getInitTensors", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto range = this->getOperation()->getOperands(); + return {range.begin() + getNumInputsAndOutputBuffers(), + range.begin() + getNumInputsAndOutputs()}; + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return one single init tensor at position `$i`. + }], + /*retTy=*/"Value", + /*methodName=*/"getInitTensor", + /*args=*/(ins "unsigned":$i), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + assert(i < $_op.getNumInitTensors() && "overflowing init tensor index"); + return getInitTensors()[i]; + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the range over inputs, output buffers and init tensors. + }], + /*retTy=*/"Operation::operand_range", + /*methodName=*/"getShapedOperands", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto range = this->getOperation()->getOperands(); + return {range.begin(), range.begin() + getNumInputsAndOutputs()}; + }] + >, InterfaceMethod< /*desc=*/[{ Return the `i`-th shaped type, there are 3 cases: @@ -445,7 +491,8 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return llvm::to_vector<4>($_op.indexing_maps().template getAsValueRange()); + return llvm::to_vector<4>( + $_op.indexing_maps().template getAsValueRange()); }] >, InterfaceMethod< @@ -528,11 +575,11 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }], /*retTy=*/"Operation *", /*methodName=*/"create", - (ins "OpBuilder &":$builder, "Location":$loc, + (ins "OpBuilder &":$builder, "Location":$loc, "TypeRange":$resultTypes, "ValueRange":$operands, "ArrayRef":$attributes), [{ - return builder.create(loc, TypeRange{}, operands, - attributes); + return builder.create( + loc, resultTypes, operands, attributes); }] >, InterfaceMethod< @@ -542,10 +589,12 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { }], /*retTy=*/"Operation *", /*methodName=*/"clone", - (ins "OpBuilder &":$b, "Location":$loc, "ValueRange":$operands), [{ + (ins "OpBuilder &":$b, "Location":$loc, "TypeRange":$resultTypes, + "ValueRange":$operands), + [{ BlockAndValueMapping map; unsigned numRegions = $_op.getOperation()->getNumRegions(); - Operation *res = create(b, loc, operands, $_op.getAttrs()); + Operation *res = create(b, loc, resultTypes, operands, $_op.getAttrs()); assert(res->getNumRegions() == numRegions && "inconsistent # regions"); for (unsigned ridx = 0; ridx < numRegions; ++ridx) $_op.getOperation()->getRegion(ridx).cloneInto( diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h index 1df2b21bdade6..5f1c756ca446f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h @@ -35,6 +35,17 @@ template class NInputs { }; }; +/// This class provides the API for ops that are known to not have init tensor +/// operands. Use as a trait as follows: +/// +/// class CopyOp : public Op { +/// +template +class ZeroInitTensors : public TraitBase { +public: + static unsigned getNumInitTensors() { return 0; } +}; + /// This class provides the API for ops that are known to have a specified /// number of outputs, all passed as operands. Use as a trait as follows: /// @@ -87,6 +98,9 @@ class NamedStructuredOpTrait unsigned getNumInputs() { return cast(this->getOperation()).inputs().size(); } + unsigned getNumInitTensors() { + return cast(this->getOperation()).init_tensors().size(); + } unsigned getNumOutputs() { ConcreteType concreteOp = cast(this->getOperation()); return concreteOp.output_buffers().size() + diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 04d417480f3bf..dfc977daa2071 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -99,7 +99,7 @@ static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op, auto operands = getAssumedNonViewOperands(op); clonedViews.append(operands.begin(), operands.end()); - Operation *clonedOp = op.clone(b, loc, clonedViews); + Operation *clonedOp = op.clone(b, loc, /*resultTypes*/ {}, clonedViews); // When the producer is an IndexedGenercOp, we have to transform its block // IV arguments according to the tiling of the consumer, i.e. offset them by // the values computed in `loopRanges`. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 676caa145c3a2..3db801bc2d575 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -405,7 +405,7 @@ Optional static tileLinalgOpImpl( tileSizes, allViewSizes); auto operands = getAssumedNonViewOperands(op); views.append(operands.begin(), operands.end()); - res = op.clone(b, loc, views); + res = op.clone(b, loc, /*resultTypes*/ {}, views); return scf::ValueVector{}; }, options.distribution); From ecc997807180a6e763f12e3d011f6b887db0d6a9 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 29 Sep 2020 09:56:54 -0400 Subject: [PATCH 022/544] [mlir][openacc] Add update operation This patch introduce the update operation that represent the OpenACC update directive. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D88102 --- .../mlir/Dialect/OpenACC/OpenACCOps.td | 40 +++++++++++++++++++ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 27 +++++++++++++ mlir/test/Dialect/OpenACC/invalid.mlir | 23 +++++++++++ mlir/test/Dialect/OpenACC/ops.mlir | 30 ++++++++++++++ 4 files changed, 120 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 85499834e5a26..862a35718f065 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -318,4 +318,44 @@ def OpenACC_YieldOp : OpenACC_Op<"yield", [Terminator, let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; } +//===----------------------------------------------------------------------===// +// 2.14.4. Update Directive +//===----------------------------------------------------------------------===// + +def OpenACC_UpdateOp : OpenACC_Op<"update", [AttrSizedOperandSegments]> { + let summary = "update operation"; + + let description = [{ + The "acc.udpate" operation represents the OpenACC update executable + directive. + As host and self clauses are synonyms, any operands for host and self are + add to $hostOperands. + + Example: + + ```mlir + acc.update device(%d1 : memref<10xf32>) attributes {async} + ``` + }]; + + let arguments = (ins Optional:$asyncOperand, + Optional:$waitDevnum, + Variadic:$waitOperands, + UnitAttr:$async, + UnitAttr:$wait, + Variadic:$hostOperands, + Variadic:$deviceOperands, + UnitAttr:$ifPresent); + + let assemblyFormat = [{ + ( `async` `(` $asyncOperand^ `:` type($asyncOperand) `)` )? + ( `wait_devnum` `(` $waitDevnum^ `:` type($waitDevnum) `)` )? + ( `wait` `(` $waitOperands^ `:` type($waitOperands) `)` )? + ( `host` `(` $hostOperands^ `:` type($hostOperands) `)` )? + ( `device` `(` $deviceOperands^ `:` type($deviceOperands) `)` )? + attr-dict-with-keyword + }]; +} + + #endif // OPENACC_OPS diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index ca2acca974e87..46df60532e1ad 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -645,6 +645,33 @@ static LogicalResult verify(acc::DataOp dataOp) { if (dataOp.getOperands().size() == 0 && !dataOp.defaultAttr()) return dataOp.emitError("at least one operand or the default attribute " "must appear on the data operation"); + return success(); +} + +//===----------------------------------------------------------------------===// +// UpdateOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(acc::UpdateOp updateOp) { + // At least one of host or device should have a value. + if (updateOp.hostOperands().size() == 0 && + updateOp.deviceOperands().size() == 0) + return updateOp.emitError("at least one value must be present in" + " hostOperands or deviceOperands"); + + // The async attribute represent the async clause without value. Therefore the + // attribute and operand cannot appear at the same time. + if (updateOp.asyncOperand() && updateOp.async()) + return updateOp.emitError("async attribute cannot appear with " + " asyncOperand"); + + // The wait attribute represent the wait clause without values. Therefore the + // attribute and operands cannot appear at the same time. + if (updateOp.waitOperands().size() > 0 && updateOp.wait()) + return updateOp.emitError("wait attribute cannot appear with waitOperands"); + + if (updateOp.waitDevnum() && updateOp.waitOperands().size() == 0) + return updateOp.emitError("wait_devnum cannot appear without waitOperands"); return success(); } diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 22345d279f0d9..c694fc5361cf5 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -75,3 +75,26 @@ acc.data { } // ----- +// expected-error@+1 {{at least one value must be present in hostOperands or deviceOperands}} +acc.update + +// ----- + +%cst = constant 1 : index +%value = alloc() : memref<10xf32> +// expected-error@+1 {{wait_devnum cannot appear without waitOperands}} +acc.update wait_devnum(%cst: index) host(%value: memref<10xf32>) + +// ----- + +%cst = constant 1 : index +%value = alloc() : memref<10xf32> +// expected-error@+1 {{async attribute cannot appear with asyncOperand}} +acc.update async(%cst: index) host(%value: memref<10xf32>) attributes {async} + +// ----- + +%cst = constant 1 : index +%value = alloc() : memref<10xf32> +// expected-error@+1 {{wait attribute cannot appear with waitOperands}} +acc.update wait(%cst: index) host(%value: memref<10xf32>) attributes {wait} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index a4fecf619a77f..c383d067f285a 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -524,3 +524,33 @@ func @testdataop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) // CHECK-NEXT: } attributes {defaultAttr = "present"} // CHECK: acc.data { // CHECK-NEXT: } attributes {defaultAttr = "none"} + +// ----- + +func @testupdateop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { + %i64Value = constant 1 : i64 + %i32Value = constant 1 : i32 + %idxValue = constant 1 : index + acc.update async(%i64Value: i64) host(%a: memref<10xf32>) + acc.update async(%i32Value: i32) host(%a: memref<10xf32>) + acc.update async(%idxValue: index) host(%a: memref<10xf32>) + acc.update wait_devnum(%i64Value: i64) wait(%i32Value, %idxValue : i32, index) host(%a: memref<10xf32>) + acc.update host(%a: memref<10xf32>) device(%b, %c : memref<10xf32>, memref<10x10xf32>) + acc.update host(%a: memref<10xf32>) device(%b, %c : memref<10xf32>, memref<10x10xf32>) attributes {async} + acc.update host(%a: memref<10xf32>) device(%b, %c : memref<10xf32>, memref<10x10xf32>) attributes {wait} + acc.update host(%a: memref<10xf32>) device(%b, %c : memref<10xf32>, memref<10x10xf32>) attributes {ifPresent} + return +} + +// CHECK: func @testupdateop([[ARGA:%.*]]: memref<10xf32>, [[ARGB:%.*]]: memref<10xf32>, [[ARGC:%.*]]: memref<10x10xf32>) { +// CHECK: [[I64VALUE:%.*]] = constant 1 : i64 +// CHECK: [[I32VALUE:%.*]] = constant 1 : i32 +// CHECK: [[IDXVALUE:%.*]] = constant 1 : index +// CHECK: acc.update async([[I64VALUE]] : i64) host([[ARGA]] : memref<10xf32>) +// CHECK: acc.update async([[I32VALUE]] : i32) host([[ARGA]] : memref<10xf32>) +// CHECK: acc.update async([[IDXVALUE]] : index) host([[ARGA]] : memref<10xf32>) +// CHECK: acc.update wait_devnum([[I64VALUE]] : i64) wait([[I32VALUE]], [[IDXVALUE]] : i32, index) host([[ARGA]] : memref<10xf32>) +// CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) +// CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) attributes {async} +// CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) attributes {wait} +// CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) attributes {ifPresent} From 14ff38e235c4aec8e444d8aec26ce5d3a4c524d2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 14:45:30 +0100 Subject: [PATCH 023/544] [InstCombine] visitTrunc - trunc (lshr (sext A), C) --> (ashr A, C) non-uniform support This came from @lebedev.ri's suggestion to use m_SpecificInt_ICMP for D88429 - since I was going to change the m_APInt to m_Constant for that patch I thought I would do it for the only other user of the APInt first. I've added a ConstantExpr::getUMin helper - its trivial to add UMAX/SMIN/SMAX but thought I'd wait until we have use cases. Differential Revision: https://reviews.llvm.org/D88475 --- llvm/include/llvm/IR/Constants.h | 1 + llvm/lib/IR/Constants.cpp | 5 +++++ .../InstCombine/InstCombineCasts.cpp | 19 +++++++++++++------ llvm/test/Transforms/InstCombine/cast.ll | 12 +++--------- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 8e2dba9b2417c..6763d04a53e97 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -959,6 +959,7 @@ class ConstantExpr : public Constant { static Constant *getAnd(Constant *C1, Constant *C2); static Constant *getOr(Constant *C1, Constant *C2); static Constant *getXor(Constant *C1, Constant *C2); + static Constant *getUMin(Constant *C1, Constant *C2); static Constant *getShl(Constant *C1, Constant *C2, bool HasNUW = false, bool HasNSW = false); static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false); diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index d84c7bc2da9db..83745b07cdd53 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2560,6 +2560,11 @@ Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) { return get(Instruction::Xor, C1, C2); } +Constant *ConstantExpr::getUMin(Constant *C1, Constant *C2) { + Constant *Cmp = ConstantExpr::getICmp(CmpInst::ICMP_ULT, C1, C2); + return getSelect(Cmp, C1, C2); +} + Constant *ConstantExpr::getShl(Constant *C1, Constant *C2, bool HasNUW, bool HasNSW) { unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) | diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 5982d48e6bf61..ca55c8f5a887b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -827,23 +827,30 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { return CastInst::CreateIntegerCast(Shift, DestTy, false); } - const APInt *C; - if (match(Src, m_LShr(m_SExt(m_Value(A)), m_APInt(C)))) { + Constant *C; + if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) { unsigned AWidth = A->getType()->getScalarSizeInBits(); unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth); // If the shift is small enough, all zero bits created by the shift are // removed by the trunc. - if (C->getZExtValue() <= MaxShiftAmt) { + // TODO: Support passing through undef shift amounts - these currently get + // clamped to MaxAmt. + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, + APInt(SrcWidth, MaxShiftAmt)))) { // trunc (lshr (sext A), C) --> ashr A, C if (A->getType() == DestTy) { - unsigned ShAmt = std::min((unsigned)C->getZExtValue(), DestWidth - 1); - return BinaryOperator::CreateAShr(A, ConstantInt::get(DestTy, ShAmt)); + Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false); + Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); + ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); + return BinaryOperator::CreateAShr(A, ShAmt); } // The types are mismatched, so create a cast after shifting: // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C) if (Src->hasOneUse()) { - unsigned ShAmt = std::min((unsigned)C->getZExtValue(), AWidth - 1); + Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false); + Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); + ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); Value *Shift = Builder.CreateAShr(A, ShAmt); return CastInst::CreateIntegerCast(Shift, DestTy, true); } diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index ad6d22aa06e43..1d3d006ad2382 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1559,9 +1559,7 @@ define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) { define <2 x i8> @trunc_lshr_sext_uniform_undef(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uniform_undef( -; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1572,9 +1570,7 @@ define <2 x i8> @trunc_lshr_sext_uniform_undef(<2 x i8> %A) { define <2 x i8> @trunc_lshr_sext_nonuniform(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_nonuniform( -; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1585,9 +1581,7 @@ define <2 x i8> @trunc_lshr_sext_nonuniform(<2 x i8> %A) { define <3 x i8> @trunc_lshr_sext_nonuniform_undef(<3 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_nonuniform_undef( -; ALL-NEXT: [[B:%.*]] = sext <3 x i8> [[A:%.*]] to <3 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <3 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = trunc <3 x i32> [[C]] to <3 x i8> +; ALL-NEXT: [[D:%.*]] = ashr <3 x i8> [[A:%.*]], ; ALL-NEXT: ret <3 x i8> [[D]] ; %B = sext <3 x i8> %A to <3 x i32> From db04bec5f1eeb581ee1470e5f444cc7b918c6d93 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 24 Sep 2020 18:59:02 -0500 Subject: [PATCH 024/544] [SDAG] Do not convert undef to 0 when folding CONCAT/BUILD_VECTOR Differential Revision: https://reviews.llvm.org/D88273 --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 ++++--- .../Hexagon/autohvx/isel-undef-not-zero.ll | 32 ++++++++++++++ .../test/CodeGen/X86/vec-strict-cmp-sub128.ll | 44 ++++++++----------- 3 files changed, 60 insertions(+), 31 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cfb4aa2f0bb53..b9362f1e762d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4370,11 +4370,16 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, for (SDValue Op : Elts) SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); - if (SVT.bitsGT(VT.getScalarType())) - for (SDValue &Op : Elts) - Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, DL, SVT) - : DAG.getSExtOrTrunc(Op, DL, SVT); + if (SVT.bitsGT(VT.getScalarType())) { + for (SDValue &Op : Elts) { + if (Op.isUndef()) + Op = DAG.getUNDEF(SVT); + else + Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, DL, SVT) + : DAG.getSExtOrTrunc(Op, DL, SVT); + } + } SDValue V = DAG.getBuildVector(VT, DL, Elts); NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll new file mode 100644 index 0000000000000..f8f0a7211a63d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Check that we don't generate lots of vinserts (of 0 that should be undef). +; CHECK: vinsert +; CHECK: vinsert +; CHECK-NOT: vinsert + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(i8* noalias align 128 %a0) #0 { +b0: + %v0 = bitcast i8* %a0 to i32* + %v1 = getelementptr inbounds i32, i32* %v0, i32 undef + %v2 = bitcast i32* %v1 to <7 x i32>* + br label %b1 + +b1: ; preds = %b0 + %v3 = load i8, i8* undef, align 1 + %v4 = insertelement <7 x i8> undef, i8 %v3, i32 0 + %v5 = shufflevector <7 x i8> %v4, <7 x i8> undef, <7 x i32> zeroinitializer + %v6 = zext <7 x i8> %v5 to <7 x i32> + %v7 = load <7 x i8>, <7 x i8>* undef, align 1 + %v8 = zext <7 x i8> %v7 to <7 x i32> + %v9 = mul nsw <7 x i32> %v6, %v8 + %v10 = add nsw <7 x i32> %v9, zeroinitializer + store <7 x i32> %v10, <7 x i32>* %v2, align 4 + ret void +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } + diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll index 97997018cff9d..da8dbe32e8406 100644 --- a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll @@ -101,7 +101,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512-32-NEXT: kmovw %eax, %k0 ; AVX512-32-NEXT: vcomiss 8(%ebp), %xmm2 ; AVX512-32-NEXT: seta %al -; AVX512-32-NEXT: andl $1, %eax ; AVX512-32-NEXT: kmovw %eax, %k1 ; AVX512-32-NEXT: kandw %k0, %k1, %k0 ; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -122,7 +121,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512-64-NEXT: kmovw %eax, %k0 ; AVX512-64-NEXT: vcomiss %xmm3, %xmm2 ; AVX512-64-NEXT: seta %al -; AVX512-64-NEXT: andl $1, %eax ; AVX512-64-NEXT: kmovw %eax, %k1 ; AVX512-64-NEXT: kandw %k0, %k1, %k0 ; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] @@ -148,7 +146,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512F-32-NEXT: kmovw %eax, %k0 ; AVX512F-32-NEXT: vcomiss 8(%ebp), %xmm2 ; AVX512F-32-NEXT: seta %al -; AVX512F-32-NEXT: andl $1, %eax ; AVX512F-32-NEXT: kmovw %eax, %k1 ; AVX512F-32-NEXT: kandw %k0, %k1, %k0 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -173,7 +170,6 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512F-64-NEXT: kmovw %eax, %k0 ; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: seta %al -; AVX512F-64-NEXT: andl $1, %eax ; AVX512F-64-NEXT: kmovw %eax, %k1 ; AVX512F-64-NEXT: kandw %k0, %k1, %k0 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] @@ -299,7 +295,6 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512-32-NEXT: sete %cl ; AVX512-32-NEXT: testb %al, %cl ; AVX512-32-NEXT: setne %al -; AVX512-32-NEXT: andl $1, %eax ; AVX512-32-NEXT: kmovw %eax, %k1 ; AVX512-32-NEXT: kandw %k0, %k1, %k0 ; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -319,27 +314,26 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; ; AVX512-64-LABEL: test_v2f32_oeq_q: ; AVX512-64: # %bb.0: -; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] -; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; AVX512-64-NEXT: vucomiss %xmm4, %xmm5 +; AVX512-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512-64-NEXT: setnp %al ; AVX512-64-NEXT: sete %cl ; AVX512-64-NEXT: testb %al, %cl ; AVX512-64-NEXT: setne %al ; AVX512-64-NEXT: kmovw %eax, %k0 -; AVX512-64-NEXT: kshiftlw $15, %k0, %k0 -; AVX512-64-NEXT: kshiftrw $14, %k0, %k0 +; AVX512-64-NEXT: movw $-3, %ax +; AVX512-64-NEXT: kmovw %eax, %k1 +; AVX512-64-NEXT: kandw %k1, %k0, %k0 +; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512-64-NEXT: setnp %al ; AVX512-64-NEXT: sete %cl ; AVX512-64-NEXT: testb %al, %cl ; AVX512-64-NEXT: setne %al -; AVX512-64-NEXT: andl $1, %eax ; AVX512-64-NEXT: kmovw %eax, %k1 -; AVX512-64-NEXT: movw $-3, %ax -; AVX512-64-NEXT: kmovw %eax, %k2 -; AVX512-64-NEXT: kandw %k2, %k1, %k1 -; AVX512-64-NEXT: korw %k0, %k1, %k1 +; AVX512-64-NEXT: kshiftlw $15, %k1, %k1 +; AVX512-64-NEXT: kshiftrw $14, %k1, %k1 +; AVX512-64-NEXT: korw %k1, %k0, %k1 ; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; AVX512-64-NEXT: retq ; @@ -358,7 +352,6 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512F-32-NEXT: sete %cl ; AVX512F-32-NEXT: testb %al, %cl ; AVX512F-32-NEXT: setne %al -; AVX512F-32-NEXT: andl $1, %eax ; AVX512F-32-NEXT: kmovw %eax, %k1 ; AVX512F-32-NEXT: kandw %k0, %k1, %k0 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -382,27 +375,26 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1, ; AVX512F-64: # %bb.0: ; AVX512F-64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] -; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; AVX512F-64-NEXT: vucomiss %xmm4, %xmm5 +; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: setnp %al ; AVX512F-64-NEXT: sete %cl ; AVX512F-64-NEXT: testb %al, %cl ; AVX512F-64-NEXT: setne %al ; AVX512F-64-NEXT: kmovw %eax, %k0 -; AVX512F-64-NEXT: kshiftlw $15, %k0, %k0 -; AVX512F-64-NEXT: kshiftrw $14, %k0, %k0 +; AVX512F-64-NEXT: movw $-3, %ax +; AVX512F-64-NEXT: kmovw %eax, %k1 +; AVX512F-64-NEXT: kandw %k1, %k0, %k0 +; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: setnp %al ; AVX512F-64-NEXT: sete %cl ; AVX512F-64-NEXT: testb %al, %cl ; AVX512F-64-NEXT: setne %al -; AVX512F-64-NEXT: andl $1, %eax ; AVX512F-64-NEXT: kmovw %eax, %k1 -; AVX512F-64-NEXT: movw $-3, %ax -; AVX512F-64-NEXT: kmovw %eax, %k2 -; AVX512F-64-NEXT: kandw %k2, %k1, %k1 -; AVX512F-64-NEXT: korw %k0, %k1, %k1 +; AVX512F-64-NEXT: kshiftlw $15, %k1, %k1 +; AVX512F-64-NEXT: kshiftrw $14, %k1, %k1 +; AVX512F-64-NEXT: korw %k1, %k0, %k1 ; AVX512F-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-64-NEXT: vzeroupper From d9ee935679e7164d1c47e351bbbcf5c25742b59c Mon Sep 17 00:00:00 2001 From: Chris Hamilton Date: Tue, 29 Sep 2020 16:11:41 +0200 Subject: [PATCH 025/544] [Sema] Address-space sensitive check for unbounded arrays (v2) Check applied to unbounded (incomplete) arrays and pointers to spot cases where the computed address is beyond the largest possible addressable extent of the array, based on the address space in which the array is delcared, or which the pointer refers to. Check helps to avoid cases of nonsense pointer math and array indexing which could lead to linker failures or runtime exceptions. Of particular interest when building for embedded systems with small address spaces. This is version 2 of this patch -- version 1 had some testing issues due to a sign error in existing code. That error is corrected and lit test for this chagne is extended to verify the fix. Originally reviewed/accepted by: aaron.ballman Original revision: https://reviews.llvm.org/D86796 Reviewed By: ebevhan Differential Revision: https://reviews.llvm.org/D88174 --- .../clang/Basic/DiagnosticSemaKinds.td | 8 ++ clang/lib/Sema/SemaChecking.cpp | 89 ++++++++++++++++--- clang/test/Sema/const-eval.c | 8 +- clang/test/Sema/unbounded-array-bounds.c | 80 +++++++++++++++++ .../SemaCXX/constant-expression-cxx1y.cpp | 3 +- 5 files changed, 170 insertions(+), 18 deletions(-) create mode 100644 clang/test/Sema/unbounded-array-bounds.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 42d50426ccd84..8f6c7b9400fae 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8917,6 +8917,14 @@ def warn_array_index_precedes_bounds : Warning< def warn_array_index_exceeds_bounds : Warning< "array index %0 is past the end of the array (which contains %1 " "element%s2)">, InGroup; +def warn_ptr_arith_exceeds_max_addressable_bounds : Warning< + "the pointer incremented by %0 refers past the last possible element for an array in %1-bit " + "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, + InGroup; +def warn_array_index_exceeds_max_addressable_bounds : Warning< + "array index %0 refers past the last possible element for an array in %1-bit " + "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, + InGroup; def note_array_declared_here : Note< "array %0 declared here">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index eeb3222624005..a5de6a5c88db9 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14057,11 +14057,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, const ConstantArrayType *ArrayTy = Context.getAsConstantArrayType(BaseExpr->getType()); - if (!ArrayTy) - return; - - const Type *BaseType = ArrayTy->getElementType().getTypePtr(); - if (EffectiveType->isDependentType() || BaseType->isDependentType()) + const Type *BaseType = + ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr(); + bool IsUnboundedArray = (BaseType == nullptr); + if (EffectiveType->isDependentType() || + (!IsUnboundedArray && BaseType->isDependentType())) return; Expr::EvalResult Result; @@ -14069,8 +14069,10 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, return; llvm::APSInt index = Result.Val.getInt(); - if (IndexNegated) + if (IndexNegated) { + index.setIsUnsigned(false); index = -index; + } const NamedDecl *ND = nullptr; if (const DeclRefExpr *DRE = dyn_cast(BaseExpr)) @@ -14078,6 +14080,69 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); + if (IsUnboundedArray) { + if (index.isUnsigned() || !index.isNegative()) { + const auto &ASTC = getASTContext(); + unsigned AddrBits = + ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace( + EffectiveType->getCanonicalTypeInternal())); + if (index.getBitWidth() < AddrBits) + index = index.zext(AddrBits); + CharUnits ElemCharUnits = ASTC.getTypeSizeInChars(EffectiveType); + llvm::APInt ElemBytes(index.getBitWidth(), ElemCharUnits.getQuantity()); + // If index has more active bits than address space, we already know + // we have a bounds violation to warn about. Otherwise, compute + // address of (index + 1)th element, and warn about bounds violation + // only if that address exceeds address space. + if (index.getActiveBits() <= AddrBits) { + bool Overflow; + llvm::APInt Product(index); + Product += 1; + Product = Product.umul_ov(ElemBytes, Overflow); + if (!Overflow && Product.getActiveBits() <= AddrBits) + return; + } + + // Need to compute max possible elements in address space, since that + // is included in diag message. + llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits); + MaxElems = MaxElems.zext(std::max(AddrBits + 1, ElemBytes.getBitWidth())); + MaxElems += 1; + ElemBytes = ElemBytes.zextOrTrunc(MaxElems.getBitWidth()); + MaxElems = MaxElems.udiv(ElemBytes); + + unsigned DiagID = + ASE ? diag::warn_array_index_exceeds_max_addressable_bounds + : diag::warn_ptr_arith_exceeds_max_addressable_bounds; + + // Diag message shows element size in bits and in "bytes" (platform- + // dependent CharUnits) + DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, + PDiag(DiagID) + << index.toString(10, true) << AddrBits + << (unsigned)ASTC.toBits(ElemCharUnits) + << ElemBytes.toString(10, false) + << MaxElems.toString(10, false) + << (unsigned)MaxElems.getLimitedValue(~0U) + << IndexExpr->getSourceRange()); + + if (!ND) { + // Try harder to find a NamedDecl to point at in the note. + while (const auto *ASE = dyn_cast(BaseExpr)) + BaseExpr = ASE->getBase()->IgnoreParenCasts(); + if (const auto *DRE = dyn_cast(BaseExpr)) + ND = DRE->getDecl(); + if (const auto *ME = dyn_cast(BaseExpr)) + ND = ME->getMemberDecl(); + } + + if (ND) + DiagRuntimeBehavior(ND->getBeginLoc(), BaseExpr, + PDiag(diag::note_array_declared_here) << ND); + } + return; + } + if (index.isUnsigned() || !index.isNegative()) { // It is possible that the type of the base expression after // IgnoreParenCasts is incomplete, even though the type of the base @@ -14140,9 +14205,8 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, } } - unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; - if (ASE) - DiagID = diag::warn_array_index_exceeds_bounds; + unsigned DiagID = ASE ? diag::warn_array_index_exceeds_bounds + : diag::warn_ptr_arith_exceeds_bounds; DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, PDiag(DiagID) << index.toString(10, true) @@ -14163,12 +14227,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (!ND) { // Try harder to find a NamedDecl to point at in the note. - while (const ArraySubscriptExpr *ASE = - dyn_cast(BaseExpr)) + while (const auto *ASE = dyn_cast(BaseExpr)) BaseExpr = ASE->getBase()->IgnoreParenCasts(); - if (const DeclRefExpr *DRE = dyn_cast(BaseExpr)) + if (const auto *DRE = dyn_cast(BaseExpr)) ND = DRE->getDecl(); - if (const MemberExpr *ME = dyn_cast(BaseExpr)) + if (const auto *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); } diff --git a/clang/test/Sema/const-eval.c b/clang/test/Sema/const-eval.c index bbcbb0e25237e..c94539ab1de27 100644 --- a/clang/test/Sema/const-eval.c +++ b/clang/test/Sema/const-eval.c @@ -140,10 +140,10 @@ EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a con // We evaluate these by providing 2s' complement semantics in constant // expressions, like we do for integers. -void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; -void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; -__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; -void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; +void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; // expected-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}} +void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; // expected-warning {{refers past the last possible element}} +__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}} +void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; // expected-warning {{refers past the last possible element}} struct PR35214_X { int k; diff --git a/clang/test/Sema/unbounded-array-bounds.c b/clang/test/Sema/unbounded-array-bounds.c new file mode 100644 index 0000000000000..d47463ff94345 --- /dev/null +++ b/clang/test/Sema/unbounded-array-bounds.c @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s \ +// RUN: --implicit-check-not 'past the last possible element' +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \ +// RUN: --implicit-check-not 'past the last possible element' +// RUN: %clang_cc1 -triple avr-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s \ +// RUN: --implicit-check-not 'past the last possible element' + +struct S { + long long a; + char b; + long long c; + short d; +}; + +struct S s[]; + +void f1() { + ++s[3].a; + ++s[7073650413200313099].b; + // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: array index 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) + ++s[7073650].c; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) +} + +long long ll[]; + +void f2() { + ++ll[3]; + ++ll[2705843009213693952]; + // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) + ++ll[847073650]; + // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) + // CHECK-AVR-ADDR16: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) +} + +void f3(struct S p[]) { + ++p[3].a; + ++p[7073650413200313099].b; + // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) + ++p[7073650].c; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) +} + +void f4(struct S *p) { + p += 3; + p += 7073650413200313099; + // CHECK-X86-ADDR64: :[[@LINE-1]]:3: warning: the pointer incremented by 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) + // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) + // CHECK-AVR-ADDR16: :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) + p += 7073650; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) +} + +struct BQ { + struct S bigblock[3276]; +}; + +struct BQ bq[]; + +void f5() { + ++bq[0].bigblock[0].a; + ++bq[1].bigblock[0].a; + // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element) +} + +void f6() { + int ints[] = {1, 3, 5, 7, 8, 6, 4, 5, 9}; + int const n_ints = sizeof(ints) / sizeof(int); + unsigned long long const N = 3; + + int *middle = &ints[0] + n_ints / 2; + // Should NOT produce a warning. + *(middle + 5 - N) = 22; +} diff --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp index 8bc4f88a63a96..7fe71d4853508 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp @@ -1018,8 +1018,9 @@ constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant } constexpr void PR28739(int n) { // expected-error {{never produces a constant}} - int *p = &n; + int *p = &n; // expected-note {{declared here}} p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} + // expected-warning@-1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}} } constexpr void Void(int n) { From 042f22bda5d3e2851205781f0b921cc810bb6dcb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 15:05:30 +0100 Subject: [PATCH 026/544] [InstCombine] Add exact shift tests missed in D88475 I missed the post-LGTM comment from @lebedev.ri --- llvm/test/Transforms/InstCombine/cast.ll | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 1d3d006ad2382..db6b550f5faa6 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1546,6 +1546,17 @@ define i8 @trunc_lshr_sext(i8 %A) { ret i8 %D } +define i8 @trunc_lshr_sext_exact(i8 %A) { +; ALL-LABEL: @trunc_lshr_sext_exact( +; ALL-NEXT: [[D:%.*]] = ashr i8 [[A:%.*]], 6 +; ALL-NEXT: ret i8 [[D]] +; + %B = sext i8 %A to i32 + %C = lshr exact i32 %B, 6 + %D = trunc i32 %C to i8 + ret i8 %D +} + define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uniform( ; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], @@ -1705,6 +1716,18 @@ define i8 @trunc_lshr_sext_wide_input(i16 %A) { ret i8 %D } +define i8 @trunc_lshr_sext_wide_input_exact(i16 %A) { +; ALL-LABEL: @trunc_lshr_sext_wide_input_exact( +; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[A:%.*]], 9 +; ALL-NEXT: [[D:%.*]] = trunc i16 [[TMP1]] to i8 +; ALL-NEXT: ret i8 [[D]] +; + %B = sext i16 %A to i32 + %C = lshr exact i32 %B, 9 + %D = trunc i32 %C to i8 + ret i8 %D +} + define <2 x i8> @trunc_lshr_sext_wide_input_uses1(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> From 64c0c9f01511dc300b29e7a20a13958c5932e314 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 29 Sep 2020 16:23:02 +0200 Subject: [PATCH 027/544] [mlir] Expose Dialect class and registration/loading to C API - Add a minimalist C API for mlir::Dialect. - Allow one to query the context about registered and loaded dialects. - Add API for loading dialects. - Provide functions to register the Standard dialect. When used naively, this will require to separately register each dialect. When we have more than one exposed, we can add variadic macros that expand to individual calls. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D88162 --- mlir/include/mlir-c/IR.h | 36 +++++++++++++++++ mlir/include/mlir-c/StandardDialect.h | 42 ++++++++++++++++++++ mlir/include/mlir/CAPI/IR.h | 1 + mlir/lib/CAPI/CMakeLists.txt | 1 + mlir/lib/CAPI/IR/IR.cpp | 36 +++++++++++++++++ mlir/lib/CAPI/Standard/CMakeLists.txt | 11 ++++++ mlir/lib/CAPI/Standard/StandardDialect.cpp | 25 ++++++++++++ mlir/test/CAPI/CMakeLists.txt | 1 + mlir/test/CAPI/ir.c | 45 ++++++++++++++++++++++ 9 files changed, 198 insertions(+) create mode 100644 mlir/include/mlir-c/StandardDialect.h create mode 100644 mlir/lib/CAPI/Standard/CMakeLists.txt create mode 100644 mlir/lib/CAPI/Standard/StandardDialect.cpp diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 4aca261868f3a..82149c7fce06b 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -20,6 +20,8 @@ #include +#include "mlir-c/Support.h" + #ifdef __cplusplus extern "C" { #endif @@ -46,6 +48,7 @@ extern "C" { typedef struct name name DEFINE_C_API_STRUCT(MlirContext, void); +DEFINE_C_API_STRUCT(MlirDialect, void); DEFINE_C_API_STRUCT(MlirOperation, void); DEFINE_C_API_STRUCT(MlirBlock, void); DEFINE_C_API_STRUCT(MlirRegion, void); @@ -97,6 +100,39 @@ void mlirContextSetAllowUnregisteredDialects(MlirContext context, int allow); /** Returns whether the context allows unregistered dialects. */ int mlirContextGetAllowUnregisteredDialects(MlirContext context); +/** Returns the number of dialects registered with the given context. A + * registered dialect will be loaded if needed by the parser. */ +intptr_t mlirContextGetNumRegisteredDialects(MlirContext context); + +/** Returns the number of dialects loaded by the context. + */ +intptr_t mlirContextGetNumLoadedDialects(MlirContext context); + +/** Gets the dialect instance owned by the given context using the dialect + * namespace to identify it, loads (i.e., constructs the instance of) the + * dialect if necessary. If the dialect is not registered with the context, + * returns null. Use mlirContextLoadDialect to load an unregistered + * dialect. */ +MlirDialect mlirContextGetOrLoadDialect(MlirContext context, + MlirStringRef name); + +/*============================================================================*/ +/* Dialect API. */ +/*============================================================================*/ + +/** Returns the context that owns the dialect. */ +MlirContext mlirDialectGetContext(MlirDialect dialect); + +/** Checks if the dialect is null. */ +int mlirDialectIsNull(MlirDialect dialect); + +/** Checks if two dialects that belong to the same context are equal. Dialects + * from different contexts will not compare equal. */ +int mlirDialectEqual(MlirDialect dialect1, MlirDialect dialect2); + +/** Returns the namespace of the given dialect. */ +MlirStringRef mlirDialectGetNamespace(MlirDialect dialect); + /*============================================================================*/ /* Location API. */ /*============================================================================*/ diff --git a/mlir/include/mlir-c/StandardDialect.h b/mlir/include/mlir-c/StandardDialect.h new file mode 100644 index 0000000000000..946d14859d5d3 --- /dev/null +++ b/mlir/include/mlir-c/StandardDialect.h @@ -0,0 +1,42 @@ +/*===-- mlir-c/StandardDialect.h - C API for Standard dialect -----*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface for registering and accessing the *| +|* Standard dialect. A dialect should be registered with a context to make it *| +|* available to users of the context. These users must load the dialect *| +|* before using any of its attributes, operations or types. Parser and pass *| +|* manager can load registered dialects automatically. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MLIR_C_STANDARDDIALECT_H +#define MLIR_C_STANDARDDIALECT_H + +#include "mlir-c/IR.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Registers the Standard dialect with the given context. This allows the + * dialect to be loaded dynamically if needed when parsing. */ +void mlirContextRegisterStandardDialect(MlirContext context); + +/** Loads the Standard dialect into the given context. The dialect does _not_ + * have to be registered in advance. */ +MlirDialect mlirContextLoadStandardDialect(MlirContext context); + +/** Returns the namespace of the Standard dialect, suitable for loading it. */ +MlirStringRef mlirStandardDialectGetNamespace(); + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_STANDARDDIALECT_H diff --git a/mlir/include/mlir/CAPI/IR.h b/mlir/include/mlir/CAPI/IR.h index 9a60ecf04fc89..dce293d05588d 100644 --- a/mlir/include/mlir/CAPI/IR.h +++ b/mlir/include/mlir/CAPI/IR.h @@ -21,6 +21,7 @@ #include "mlir/IR/Operation.h" DEFINE_C_API_PTR_METHODS(MlirContext, mlir::MLIRContext) +DEFINE_C_API_PTR_METHODS(MlirDialect, mlir::Dialect) DEFINE_C_API_PTR_METHODS(MlirOperation, mlir::Operation) DEFINE_C_API_PTR_METHODS(MlirBlock, mlir::Block) DEFINE_C_API_PTR_METHODS(MlirRegion, mlir::Region) diff --git a/mlir/lib/CAPI/CMakeLists.txt b/mlir/lib/CAPI/CMakeLists.txt index 79d472b2d026b..b9d2c4601b98b 100644 --- a/mlir/lib/CAPI/CMakeLists.txt +++ b/mlir/lib/CAPI/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Registration) +add_subdirectory(Standard) diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 3b99f8ac47486..359ee69708eb7 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "mlir-c/IR.h" +#include "mlir-c/Support.h" #include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Support.h" #include "mlir/CAPI/Utils.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Dialect.h" @@ -41,6 +43,40 @@ void mlirContextSetAllowUnregisteredDialects(MlirContext context, int allow) { int mlirContextGetAllowUnregisteredDialects(MlirContext context) { return unwrap(context)->allowsUnregisteredDialects(); } +intptr_t mlirContextGetNumRegisteredDialects(MlirContext context) { + return static_cast(unwrap(context)->getAvailableDialects().size()); +} + +// TODO: expose a cheaper way than constructing + sorting a vector only to take +// its size. +intptr_t mlirContextGetNumLoadedDialects(MlirContext context) { + return static_cast(unwrap(context)->getLoadedDialects().size()); +} + +MlirDialect mlirContextGetOrLoadDialect(MlirContext context, + MlirStringRef name) { + return wrap(unwrap(context)->getOrLoadDialect(unwrap(name))); +} + +/* ========================================================================== */ +/* Dialect API. */ +/* ========================================================================== */ + +MlirContext mlirDialectGetContext(MlirDialect dialect) { + return wrap(unwrap(dialect)->getContext()); +} + +int mlirDialectIsNull(MlirDialect dialect) { + return unwrap(dialect) == nullptr; +} + +int mlirDialectEqual(MlirDialect dialect1, MlirDialect dialect2) { + return unwrap(dialect1) == unwrap(dialect2); +} + +MlirStringRef mlirDialectGetNamespace(MlirDialect dialect) { + return wrap(unwrap(dialect)->getNamespace()); +} /* ========================================================================== */ /* Location API. */ diff --git a/mlir/lib/CAPI/Standard/CMakeLists.txt b/mlir/lib/CAPI/Standard/CMakeLists.txt new file mode 100644 index 0000000000000..662841c2d2357 --- /dev/null +++ b/mlir/lib/CAPI/Standard/CMakeLists.txt @@ -0,0 +1,11 @@ +add_mlir_library(MLIRCAPIStandard + + StandardDialect.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir-c + + LINK_LIBS PUBLIC + MLIRCAPIIR + MLIRStandardOps + ) diff --git a/mlir/lib/CAPI/Standard/StandardDialect.cpp b/mlir/lib/CAPI/Standard/StandardDialect.cpp new file mode 100644 index 0000000000000..f78c9c916873e --- /dev/null +++ b/mlir/lib/CAPI/Standard/StandardDialect.cpp @@ -0,0 +1,25 @@ +//===- StandardDialect.cpp - C Interface for Standard dialect -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/StandardDialect.h" +#include "mlir-c/IR.h" +#include "mlir/CAPI/IR.h" +#include "mlir/CAPI/Support.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" + +void mlirContextRegisterStandardDialect(MlirContext context) { + unwrap(context)->getDialectRegistry().insert(); +} + +MlirDialect mlirContextLoadStandardDialect(MlirContext context) { + return wrap(unwrap(context)->getOrLoadDialect()); +} + +MlirStringRef mlirStandardDialectGetNamespace() { + return wrap(mlir::StandardOpsDialect::getDialectNamespace()); +} diff --git a/mlir/test/CAPI/CMakeLists.txt b/mlir/test/CAPI/CMakeLists.txt index 19deda5e3f11a..876d701d72118 100644 --- a/mlir/test/CAPI/CMakeLists.txt +++ b/mlir/test/CAPI/CMakeLists.txt @@ -13,4 +13,5 @@ target_link_libraries(mlir-capi-ir-test PRIVATE MLIRCAPIIR MLIRCAPIRegistration + MLIRCAPIStandard ${dialect_libs}) diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index 909929647a84a..ae60d56a22ed8 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -14,6 +14,7 @@ #include "mlir-c/AffineMap.h" #include "mlir-c/Registration.h" #include "mlir-c/StandardAttributes.h" +#include "mlir-c/StandardDialect.h" #include "mlir-c/StandardTypes.h" #include @@ -790,6 +791,42 @@ int printAffineMap(MlirContext ctx) { return 0; } +int registerOnlyStd() { + MlirContext ctx = mlirContextCreate(); + // The built-in dialect is always loaded. + if (mlirContextGetNumLoadedDialects(ctx) != 1) + return 1; + + MlirDialect std = + mlirContextGetOrLoadDialect(ctx, mlirStandardDialectGetNamespace()); + if (!mlirDialectIsNull(std)) + return 2; + + mlirContextRegisterStandardDialect(ctx); + if (mlirContextGetNumRegisteredDialects(ctx) != 1) + return 3; + if (mlirContextGetNumLoadedDialects(ctx) != 1) + return 4; + + std = mlirContextGetOrLoadDialect(ctx, mlirStandardDialectGetNamespace()); + if (mlirDialectIsNull(std)) + return 5; + if (mlirContextGetNumLoadedDialects(ctx) != 2) + return 6; + + MlirDialect alsoStd = mlirContextLoadStandardDialect(ctx); + if (!mlirDialectEqual(std, alsoStd)) + return 7; + + MlirStringRef stdNs = mlirDialectGetNamespace(std); + MlirStringRef alsoStdNs = mlirStandardDialectGetNamespace(); + if (stdNs.length != alsoStdNs.length || + strncmp(stdNs.data, alsoStdNs.data, stdNs.length)) + return 8; + + return 0; +} + int main() { MlirContext ctx = mlirContextCreate(); mlirRegisterAllDialects(ctx); @@ -935,6 +972,14 @@ int main() { errcode = printAffineMap(ctx); fprintf(stderr, "%d\n", errcode); + fprintf(stderr, "@registration\n"); + errcode = registerOnlyStd(); + fprintf(stderr, "%d\n", errcode); + // clang-format off + // CHECK-LABEL: @registration + // CHECK: 0 + // clang-format on + mlirContextDestroy(ctx); return 0; From 89a8a0c910422b9d363120769e2eebda03394b0f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 15:30:46 +0100 Subject: [PATCH 028/544] [InstCombine] Inherit exact flags on extended shifts in trunc (lshr (sext A), C) --> (ashr A, C) This was missed in D88475 --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 7 +++++-- llvm/test/Transforms/InstCombine/cast.ll | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index ca55c8f5a887b..fb885790d448e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -831,6 +831,8 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) { unsigned AWidth = A->getType()->getScalarSizeInBits(); unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth); + auto *OldSh = cast(Src); + bool IsExact = OldSh->isExact(); // If the shift is small enough, all zero bits created by the shift are // removed by the trunc. @@ -843,7 +845,8 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false); Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); - return BinaryOperator::CreateAShr(A, ShAmt); + return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt) + : BinaryOperator::CreateAShr(A, ShAmt); } // The types are mismatched, so create a cast after shifting: // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C) @@ -851,7 +854,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false); Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); - Value *Shift = Builder.CreateAShr(A, ShAmt); + Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact); return CastInst::CreateIntegerCast(Shift, DestTy, true); } } diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index db6b550f5faa6..97439606973c3 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1548,7 +1548,7 @@ define i8 @trunc_lshr_sext(i8 %A) { define i8 @trunc_lshr_sext_exact(i8 %A) { ; ALL-LABEL: @trunc_lshr_sext_exact( -; ALL-NEXT: [[D:%.*]] = ashr i8 [[A:%.*]], 6 +; ALL-NEXT: [[D:%.*]] = ashr exact i8 [[A:%.*]], 6 ; ALL-NEXT: ret i8 [[D]] ; %B = sext i8 %A to i32 @@ -1718,7 +1718,7 @@ define i8 @trunc_lshr_sext_wide_input(i16 %A) { define i8 @trunc_lshr_sext_wide_input_exact(i16 %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_exact( -; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[A:%.*]], 9 +; ALL-NEXT: [[TMP1:%.*]] = ashr exact i16 [[A:%.*]], 9 ; ALL-NEXT: [[D:%.*]] = trunc i16 [[TMP1]] to i8 ; ALL-NEXT: ret i8 [[D]] ; From 4fb303f340e2c55783f9b0f3ed33fa2c36360acf Mon Sep 17 00:00:00 2001 From: Tadeo Kondrak Date: Tue, 29 Sep 2020 16:29:22 +0200 Subject: [PATCH 029/544] [clangd] Improve PopulateSwitch tweak to work on non-empty switches Improve the recently-added PopulateSwitch tweak to work on non-empty switches. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D88434 --- .../clangd/refactor/tweaks/PopulateSwitch.cpp | 73 ++++++++++++--- .../clangd/unittests/TweakTests.cpp | 93 ++++++++++++++++++- 2 files changed, 146 insertions(+), 20 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp index e84a420f6218a..753e8b4df8265 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp @@ -33,12 +33,15 @@ #include "AST.h" #include "Selection.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/SmallSet.h" +#include #include namespace clang { @@ -52,18 +55,16 @@ class PopulateSwitch : public Tweak { Intent intent() const override { return Refactor; } private: - ASTContext *ASTCtx = nullptr; const DeclContext *DeclCtx = nullptr; const SwitchStmt *Switch = nullptr; const CompoundStmt *Body = nullptr; + const EnumType *EnumT = nullptr; const EnumDecl *EnumD = nullptr; }; REGISTER_TWEAK(PopulateSwitch) bool PopulateSwitch::prepare(const Selection &Sel) { - ASTCtx = &Sel.AST->getASTContext(); - const SelectionTree::Node *CA = Sel.ASTSelection.commonAncestor(); if (!CA) return false; @@ -94,11 +95,6 @@ bool PopulateSwitch::prepare(const Selection &Sel) { if (!Body) return false; - // Since we currently always insert all enumerators, don't suggest this tweak - // if the body is not empty. - if (!Body->body_empty()) - return false; - const Expr *Cond = Switch->getCond(); if (!Cond) return false; @@ -106,7 +102,7 @@ bool PopulateSwitch::prepare(const Selection &Sel) { // Ignore implicit casts, since enums implicitly cast to integer types. Cond = Cond->IgnoreParenImpCasts(); - const EnumType *EnumT = Cond->getType()->getAsAdjusted(); + EnumT = Cond->getType()->getAsAdjusted(); if (!EnumT) return false; @@ -114,21 +110,65 @@ bool PopulateSwitch::prepare(const Selection &Sel) { if (!EnumD) return false; - // If there aren't any enumerators, there's nothing to insert. - if (EnumD->enumerator_begin() == EnumD->enumerator_end()) - return false; + // We trigger if there are fewer cases than enum values (and no case covers + // multiple values). This guarantees we'll have at least one case to insert. + // We don't yet determine what the cases are, as that means evaluating + // expressions. + auto I = EnumD->enumerator_begin(); + auto E = EnumD->enumerator_end(); + + for (const SwitchCase *CaseList = Switch->getSwitchCaseList(); + CaseList && I != E; CaseList = CaseList->getNextSwitchCase(), I++) { + // Default likely intends to cover cases we'd insert. + if (isa(CaseList)) + return false; + + const CaseStmt *CS = cast(CaseList); + // Case statement covers multiple values, so just counting doesn't work. + if (CS->caseStmtIsGNURange()) + return false; - return true; + // Case expression is not a constant expression or is value-dependent, + // so we may not be able to work out which cases are covered. + const ConstantExpr *CE = dyn_cast(CS->getLHS()); + if (!CE || CE->isValueDependent()) + return false; + } + + // Only suggest tweak if we have more enumerators than cases. + return I != E; } Expected PopulateSwitch::apply(const Selection &Sel) { - const SourceManager &SM = ASTCtx->getSourceManager(); + ASTContext &Ctx = Sel.AST->getASTContext(); + + // Get the enum's integer width and signedness, for adjusting case literals. + unsigned EnumIntWidth = Ctx.getIntWidth(QualType(EnumT, 0)); + bool EnumIsSigned = EnumT->isSignedIntegerOrEnumerationType(); + + llvm::SmallSet ExistingEnumerators; + for (const SwitchCase *CaseList = Switch->getSwitchCaseList(); CaseList; + CaseList = CaseList->getNextSwitchCase()) { + const CaseStmt *CS = cast(CaseList); + assert(!CS->caseStmtIsGNURange()); + const ConstantExpr *CE = cast(CS->getLHS()); + assert(!CE->isValueDependent()); + llvm::APSInt Val = CE->getResultAsAPSInt(); + Val = Val.extOrTrunc(EnumIntWidth); + Val.setIsSigned(EnumIsSigned); + ExistingEnumerators.insert(Val); + } + SourceLocation Loc = Body->getRBracLoc(); + ASTContext &DeclASTCtx = DeclCtx->getParentASTContext(); std::string Text; for (EnumConstantDecl *Enumerator : EnumD->enumerators()) { + if (ExistingEnumerators.contains(Enumerator->getInitVal())) + continue; + Text += "case "; - Text += getQualification(*ASTCtx, DeclCtx, Loc, EnumD); + Text += getQualification(DeclASTCtx, DeclCtx, Loc, EnumD); if (EnumD->isScoped()) { Text += EnumD->getName(); Text += "::"; @@ -136,8 +176,11 @@ Expected PopulateSwitch::apply(const Selection &Sel) { Text += Enumerator->getName(); Text += ":"; } + + assert(!Text.empty() && "No enumerators to insert!"); Text += "break;"; + const SourceManager &SM = Ctx.getSourceManager(); return Effect::mainFileEdit( SM, tooling::Replacements(tooling::Replacement(SM, Loc, 0, Text))); } diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index e6fa01a52b3d7..7a217220c3841 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -2829,9 +2829,48 @@ TEST_F(PopulateSwitchTest, Test) { "unavailable", }, { - // Existing enumerators in switch + // All enumerators already in switch (unscoped) Function, - R""(enum Enum {A}; ^switch ((Enum)0) {case A:break;})"", + R""(enum Enum {A,B}; ^switch (A) {case A:break;case B:break;})"", + "unavailable", + }, + { + // All enumerators already in switch (scoped) + Function, + R""( + enum class Enum {A,B}; + ^switch (Enum::A) {case Enum::A:break;case Enum::B:break;} + )"", + "unavailable", + }, + { + // Default case in switch + Function, + R""( + enum class Enum {A,B}; + ^switch (Enum::A) {default:break;} + )"", + "unavailable", + }, + { + // GNU range in switch + Function, + R""( + enum class Enum {A,B}; + ^switch (Enum::A) {case Enum::A ... Enum::B:break;} + )"", + "unavailable", + }, + { + // Value dependent case expression + File, + R""( + enum class Enum {A,B}; + template + void function() { + ^switch (Enum::A) {case Value:break;} + } + )"", "unavailable", }, { @@ -2867,9 +2906,53 @@ TEST_F(PopulateSwitchTest, Test) { { // Scoped enumeration with multiple enumerators Function, - R""(enum class Enum {A,B}; ^switch (Enum::A) {})"", - R""(enum class Enum {A,B}; )"" - R""(switch (Enum::A) {case Enum::A:case Enum::B:break;})"", + R""( + enum class Enum {A,B}; + ^switch (Enum::A) {} + )"", + R""( + enum class Enum {A,B}; + switch (Enum::A) {case Enum::A:case Enum::B:break;} + )"", + }, + { + // Only filling in missing enumerators (unscoped) + Function, + R""( + enum Enum {A,B,C}; + ^switch (A) {case B:break;} + )"", + R""( + enum Enum {A,B,C}; + switch (A) {case B:break;case A:case C:break;} + )"", + }, + { + // Only filling in missing enumerators, + // even when using integer literals + Function, + R""( + enum Enum {A,B=1,C}; + ^switch (A) {case 1:break;} + )"", + R""( + enum Enum {A,B=1,C}; + switch (A) {case 1:break;case A:case C:break;} + )"", + }, + { + // Only filling in missing enumerators (scoped) + Function, + R""( + enum class Enum {A,B,C}; + ^switch (Enum::A) + {case Enum::B:break;} + )"", + R""( + enum class Enum {A,B,C}; + switch (Enum::A) + {case Enum::B:break;case Enum::A:case Enum::C:break;} + )"", }, { // Scoped enumerations in namespace From cc3b8e730e4e8783cc9d81a00fd235068fa522e5 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 29 Sep 2020 10:39:13 -0400 Subject: [PATCH 030/544] [mlir][openacc] Add wait operation This patch introduce the wait operation that represent the OpenACC wait directive. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D88125 --- .../mlir/Dialect/OpenACC/OpenACCOps.td | 32 ++++++++++++++++ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 16 ++++++++ mlir/test/Dialect/OpenACC/invalid.mlir | 13 +++++++ mlir/test/Dialect/OpenACC/ops.mlir | 38 +++++++++++++++++++ 4 files changed, 99 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 862a35718f065..50ce7cbac668d 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -357,5 +357,37 @@ def OpenACC_UpdateOp : OpenACC_Op<"update", [AttrSizedOperandSegments]> { }]; } +//===----------------------------------------------------------------------===// +// 2.16.3. Wait Directive +//===----------------------------------------------------------------------===// + +def OpenACC_WaitOp : OpenACC_Op<"wait", [AttrSizedOperandSegments]> { + let summary = "wait operation"; + + let description = [{ + The "acc.wait" operation represents the OpenACC wait executable + directive. + + Example: + + ```mlir + acc.wait(%value1: index) + acc.wait() async(%async1: i32) + ``` + }]; + + let arguments = (ins Variadic:$waitOperands, + Optional:$asyncOperand, + Optional:$waitDevnum, + UnitAttr:$async, + Optional:$ifCond); + + let assemblyFormat = [{ + ( `(` $waitOperands^ `:` type($waitOperands) `)` )? + ( `async` `(` $asyncOperand^ `:` type($asyncOperand) `)` )? + ( `wait_devnum` `(` $waitDevnum^ `:` type($waitDevnum) `)` )? + ( `if` `(` $ifCond^ `)` )? attr-dict-with-keyword + }]; +} #endif // OPENACC_OPS diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 46df60532e1ad..f7d7ebbd71efc 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -676,5 +676,21 @@ static LogicalResult verify(acc::UpdateOp updateOp) { return success(); } +//===----------------------------------------------------------------------===// +// WaitOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(acc::WaitOp waitOp) { + // The async attribute represent the async clause without value. Therefore the + // attribute and operand cannot appear at the same time. + if (waitOp.asyncOperand() && waitOp.async()) + return waitOp.emitError("async attribute cannot appear with asyncOperand"); + + if (waitOp.waitDevnum() && waitOp.waitOperands().empty()) + return waitOp.emitError("wait_devnum cannot appear without waitOperands"); + + return success(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/OpenACC/OpenACCOps.cpp.inc" diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index c694fc5361cf5..dbe8b5095316c 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -75,6 +75,7 @@ acc.data { } // ----- + // expected-error@+1 {{at least one value must be present in hostOperands or deviceOperands}} acc.update @@ -98,3 +99,15 @@ acc.update async(%cst: index) host(%value: memref<10xf32>) attributes {async} %value = alloc() : memref<10xf32> // expected-error@+1 {{wait attribute cannot appear with waitOperands}} acc.update wait(%cst: index) host(%value: memref<10xf32>) attributes {wait} + +// ----- + +%cst = constant 1 : index +// expected-error@+1 {{wait_devnum cannot appear without waitOperands}} +acc.wait wait_devnum(%cst: index) + +// ----- + +%cst = constant 1 : index +// expected-error@+1 {{async attribute cannot appear with asyncOperand}} +acc.wait async(%cst: index) attributes {async} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index c383d067f285a..8878acba961fa 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -554,3 +554,41 @@ func @testupdateop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32> // CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) attributes {async} // CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) attributes {wait} // CHECK: acc.update host([[ARGA]] : memref<10xf32>) device([[ARGB]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) attributes {ifPresent} + +// ----- + +%i64Value = constant 1 : i64 +%i32Value = constant 1 : i32 +%idxValue = constant 1 : index +%ifCond = constant true +acc.wait +acc.wait(%i64Value: i64) +acc.wait(%i32Value: i32) +acc.wait(%idxValue: index) +acc.wait(%i32Value, %idxValue : i32, index) +acc.wait async(%i64Value: i64) +acc.wait async(%i32Value: i32) +acc.wait async(%idxValue: index) +acc.wait(%i32Value: i32) async(%idxValue: index) +acc.wait(%i64Value: i64) wait_devnum(%i32Value: i32) +acc.wait attributes {async} +acc.wait(%i64Value: i64) async(%idxValue: index) wait_devnum(%i32Value: i32) +acc.wait if(%ifCond) + +// CHECK: [[I64VALUE:%.*]] = constant 1 : i64 +// CHECK: [[I32VALUE:%.*]] = constant 1 : i32 +// CHECK: [[IDXVALUE:%.*]] = constant 1 : index +// CHECK: [[IFCOND:%.*]] = constant true +// CHECK: acc.wait +// CHECK: acc.wait([[I64VALUE]] : i64) +// CHECK: acc.wait([[I32VALUE]] : i32) +// CHECK: acc.wait([[IDXVALUE]] : index) +// CHECK: acc.wait([[I32VALUE]], [[IDXVALUE]] : i32, index) +// CHECK: acc.wait async([[I64VALUE]] : i64) +// CHECK: acc.wait async([[I32VALUE]] : i32) +// CHECK: acc.wait async([[IDXVALUE]] : index) +// CHECK: acc.wait([[I32VALUE]] : i32) async([[IDXVALUE]] : index) +// CHECK: acc.wait([[I64VALUE]] : i64) wait_devnum([[I32VALUE]] : i32) +// CHECK: acc.wait attributes {async} +// CHECK: acc.wait([[I64VALUE]] : i64) async([[IDXVALUE]] : index) wait_devnum([[I32VALUE]] : i32) +// CHECK: acc.wait if([[IFCOND]]) From 7a55989dc4305e66734bdd84a9f9eefeb9fe64bd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 15:49:43 +0100 Subject: [PATCH 031/544] [InstCombine] Add some basic trunc(lshr(zext(x),c)) tests Copied from the sext equivalents --- llvm/test/Transforms/InstCombine/cast.ll | 84 ++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 97439606973c3..c5f18b4c625e8 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -2012,6 +2012,90 @@ define i8 @trunc_lshr_overshift2_sext_uses3(i8 %A) { ret i8 %D } +define i8 @trunc_lshr_zext(i8 %A) { +; ALL-LABEL: @trunc_lshr_zext( +; ALL-NEXT: [[TMP1:%.*]] = lshr i8 [[A:%.*]], 6 +; ALL-NEXT: ret i8 [[TMP1]] +; + %B = zext i8 %A to i32 + %C = lshr i32 %B, 6 + %D = trunc i32 %C to i8 + ret i8 %D +} + +define i8 @trunc_lshr_zext_exact(i8 %A) { +; ALL-LABEL: @trunc_lshr_zext_exact( +; ALL-NEXT: [[TMP1:%.*]] = lshr i8 [[A:%.*]], 6 +; ALL-NEXT: ret i8 [[TMP1]] +; + %B = zext i8 %A to i32 + %C = lshr exact i32 %B, 6 + %D = trunc i32 %C to i8 + ret i8 %D +} + +define <2 x i8> @trunc_lshr_zext_uniform(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_uniform( +; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], +; ALL-NEXT: ret <2 x i8> [[TMP1]] +; + %B = zext <2 x i8> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + +define <2 x i8> @trunc_lshr_zext_uniform_undef(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_uniform_undef( +; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; ALL-NEXT: ret <2 x i8> [[D]] +; + %B = zext <2 x i8> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + +define <2 x i8> @trunc_lshr_zext_nonuniform(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_nonuniform( +; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A:%.*]], +; ALL-NEXT: ret <2 x i8> [[C]] +; + %B = zext <2 x i8> %A to <2 x i32> + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + +define <3 x i8> @trunc_lshr_zext_nonuniform_undef(<3 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_nonuniform_undef( +; ALL-NEXT: [[B:%.*]] = zext <3 x i8> [[A:%.*]] to <3 x i32> +; ALL-NEXT: [[C:%.*]] = lshr <3 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = trunc <3 x i32> [[C]] to <3 x i8> +; ALL-NEXT: ret <3 x i8> [[D]] +; + %B = zext <3 x i8> %A to <3 x i32> + %C = lshr <3 x i32> %B, + %D = trunc <3 x i32> %C to <3 x i8> + ret <3 x i8> %D +} + +define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_uses1( +; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> +; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) +; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A]], +; ALL-NEXT: ret <2 x i8> [[C]] +; + %B = zext <2 x i8> %A to <2 x i32> + call void @use_v2i32(<2 x i32> %B) + %C = lshr <2 x i32> %B, + %D = trunc <2 x i32> %C to <2 x i8> + ret <2 x i8> %D +} + ; The following four tests sext + lshr + trunc patterns. ; PR33078 From 51323fe2b89e976dc53356299d5cc3daeaaee5a7 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 29 Sep 2020 10:58:46 -0400 Subject: [PATCH 032/544] [mlir][openacc] Add init operation This patch introduces the init operation that represents the init executable directive from the OpenACC 3.0 specifications. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D88254 --- .../mlir/Dialect/OpenACC/OpenACCOps.td | 30 +++++++++++++++++++ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 13 ++++++++ mlir/test/Dialect/OpenACC/invalid.mlir | 16 ++++++++++ mlir/test/Dialect/OpenACC/ops.mlir | 28 +++++++++++++++++ 4 files changed, 87 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 50ce7cbac668d..0d8efcc456b44 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -318,6 +318,36 @@ def OpenACC_YieldOp : OpenACC_Op<"yield", [Terminator, let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; } +//===----------------------------------------------------------------------===// +// 2.14.1. Init Directive +//===----------------------------------------------------------------------===// + +def OpenACC_InitOp : OpenACC_Op<"init", [AttrSizedOperandSegments]> { + let summary = "init operation"; + + let description = [{ + The "acc.init" operation represents the OpenACC init executable + directive. + + Example: + + ```mlir + acc.init + acc.init device_num(%dev1 : i32) + ``` + }]; + + let arguments = (ins Variadic:$deviceTypeOperands, + Optional:$deviceNumOperand, + Optional:$ifCond); + + let assemblyFormat = [{ + ( `device_type` `(` $deviceTypeOperands^ `:` type($deviceTypeOperands) `)` )? + ( `device_num` `(` $deviceNumOperand^ `:` type($deviceNumOperand) `)` )? + ( `if` `(` $ifCond^ `)` )? attr-dict-with-keyword + }]; +} + //===----------------------------------------------------------------------===// // 2.14.4. Update Directive //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index f7d7ebbd71efc..515f5a9e28e8f 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -648,6 +648,19 @@ static LogicalResult verify(acc::DataOp dataOp) { return success(); } +//===----------------------------------------------------------------------===// +// InitOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(acc::InitOp initOp) { + Operation *currOp = initOp; + while ((currOp = currOp->getParentOp())) { + if (isa(currOp) || isa(currOp)) + return initOp.emitOpError("cannot be nested in a compute operation"); + } + return success(); +} + //===----------------------------------------------------------------------===// // UpdateOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index dbe8b5095316c..7a8a07f78f9a5 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -111,3 +111,19 @@ acc.wait wait_devnum(%cst: index) %cst = constant 1 : index // expected-error@+1 {{async attribute cannot appear with asyncOperand}} acc.wait async(%cst: index) attributes {async} + +// ----- + +acc.parallel { +// expected-error@+1 {{'acc.init' op cannot be nested in a compute operation}} + acc.init + acc.yield +} + +// ----- + +acc.loop { +// expected-error@+1 {{'acc.init' op cannot be nested in a compute operation}} + acc.init + acc.yield +} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 8878acba961fa..a4dec5dcf480b 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -592,3 +592,31 @@ acc.wait if(%ifCond) // CHECK: acc.wait attributes {async} // CHECK: acc.wait([[I64VALUE]] : i64) async([[IDXVALUE]] : index) wait_devnum([[I32VALUE]] : i32) // CHECK: acc.wait if([[IFCOND]]) + +// ----- + +%i64Value = constant 1 : i64 +%i32Value = constant 1 : i32 +%i32Value2 = constant 2 : i32 +%idxValue = constant 1 : index +%ifCond = constant true +acc.init +acc.init device_type(%i32Value : i32) +acc.init device_type(%i32Value, %i32Value2 : i32, i32) +acc.init device_num(%i64Value : i64) +acc.init device_num(%i32Value : i32) +acc.init device_num(%idxValue : index) +acc.init if(%ifCond) + +// CHECK: [[I64VALUE:%.*]] = constant 1 : i64 +// CHECK: [[I32VALUE:%.*]] = constant 1 : i32 +// CHECK: [[I32VALUE2:%.*]] = constant 2 : i32 +// CHECK: [[IDXVALUE:%.*]] = constant 1 : index +// CHECK: [[IFCOND:%.*]] = constant true +// CHECK: acc.init +// CHECK: acc.init device_type([[I32VALUE]] : i32) +// CHECK: acc.init device_type([[I32VALUE]], [[I32VALUE2]] : i32, i32) +// CHECK: acc.init device_num([[I64VALUE]] : i64) +// CHECK: acc.init device_num([[I32VALUE]] : i32) +// CHECK: acc.init device_num([[IDXVALUE]] : index) +// CHECK: acc.init if([[IFCOND]]) From ee34d9b210cb5a6d14fe069e2e2ae75b0548dba9 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 29 Sep 2020 11:02:03 -0400 Subject: [PATCH 033/544] [InstCombine] use redirect of input file in regression tests; NFC This is a repeat of 1880092722 from 2009. We should have less risk of hitting bugs at this point because we auto-generate positive CHECK lines only, but this makes things consistent. Copying the original commit msg: "Change tests from "opt %s" to "opt < %s" so that opt doesn't see the input filename so that opt doesn't print the input filename in the output so that grep lines in the tests don't unintentionally match strings in the input filename." --- llvm/test/Transforms/InstCombine/cmp-x-vs-neg-x.ll | 2 +- ...tional-variable-length-signext-after-high-bit-extract.ll | 2 +- .../test/Transforms/InstCombine/do-not-clone-dbg-declare.ll | 2 +- .../Transforms/InstCombine/high-bit-signmask-with-trunc.ll | 2 +- llvm/test/Transforms/InstCombine/high-bit-signmask.ll | 2 +- llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll | 2 +- llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll | 6 +++--- .../Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll | 2 +- .../Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll | 2 +- .../InstCombine/merging-multiple-stores-into-successor.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-a.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-b.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-c.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-d.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-e.ll | 2 +- ...partally-redundant-left-shift-input-masking-variant-a.ll | 2 +- ...partally-redundant-left-shift-input-masking-variant-b.ll | 2 +- ...partally-redundant-left-shift-input-masking-variant-c.ll | 2 +- ...partally-redundant-left-shift-input-masking-variant-d.ll | 2 +- ...partally-redundant-left-shift-input-masking-variant-e.ll | 2 +- llvm/test/Transforms/InstCombine/pr21891.ll | 2 +- llvm/test/Transforms/InstCombine/pr38897.ll | 2 +- llvm/test/Transforms/InstCombine/pr38915.ll | 2 +- llvm/test/Transforms/InstCombine/pr41164.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-a.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-b.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-c.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-d.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-e.ll | 2 +- ...t-left-shift-input-masking-after-truncation-variant-f.ll | 2 +- .../redundant-left-shift-input-masking-variant-a.ll | 2 +- .../redundant-left-shift-input-masking-variant-b.ll | 2 +- .../redundant-left-shift-input-masking-variant-c.ll | 2 +- .../redundant-left-shift-input-masking-variant-d.ll | 2 +- .../redundant-left-shift-input-masking-variant-e.ll | 2 +- .../redundant-left-shift-input-masking-variant-f.ll | 2 +- .../InstCombine/redundant-right-shift-input-masking.ll | 2 +- ...esult-of-add-of-negative-is-non-zero-and-no-underflow.ll | 2 +- ...-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll | 2 +- .../result-of-usub-is-non-zero-and-no-overflow.ll | 2 +- .../InstCombine/sdiv-exact-by-negative-power-of-two.ll | 2 +- .../Transforms/InstCombine/sdiv-exact-by-power-of-two.ll | 2 +- .../sdiv-of-non-negative-by-negative-power-of-two.ll | 2 +- ...-amount-reassociation-in-bittest-with-truncation-lshr.ll | 2 +- ...t-amount-reassociation-in-bittest-with-truncation-shl.ll | 2 +- .../InstCombine/shift-amount-reassociation-in-bittest.ll | 2 +- .../shift-amount-reassociation-with-truncation-ashr.ll | 2 +- .../shift-amount-reassociation-with-truncation-lshr.ll | 2 +- .../shift-amount-reassociation-with-truncation-shl.ll | 2 +- .../Transforms/InstCombine/shift-amount-reassociation.ll | 2 +- llvm/test/Transforms/InstCombine/shift-by-signext.ll | 2 +- .../Transforms/InstCombine/shift-direction-in-bit-test.ll | 2 +- .../test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll | 2 +- .../Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll | 2 +- .../Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll | 2 +- .../Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll | 2 +- llvm/test/Transforms/InstCombine/srem-via-sdiv-mul-sub.ll | 2 +- ...ict-sub-underflow-check-to-comparison-of-sub-operands.ll | 2 +- llvm/test/Transforms/InstCombine/sub-of-negatible.ll | 2 +- .../unsigned-add-lack-of-overflow-check-via-add.ll | 2 +- .../unsigned-add-lack-of-overflow-check-via-xor.ll | 2 +- .../InstCombine/unsigned-add-lack-of-overflow-check.ll | 2 +- .../InstCombine/unsigned-add-overflow-check-via-add.ll | 2 +- .../InstCombine/unsigned-add-overflow-check-via-xor.ll | 2 +- .../Transforms/InstCombine/unsigned-add-overflow-check.ll | 2 +- .../unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll | 2 +- ...signed-mul-lack-of-overflow-check-via-udiv-of-allones.ll | 2 +- .../InstCombine/unsigned-mul-overflow-check-via-mul-udiv.ll | 2 +- .../unsigned-mul-overflow-check-via-udiv-of-allones.ll | 2 +- .../InstCombine/unsigned-sub-lack-of-overflow-check.ll | 2 +- .../Transforms/InstCombine/unsigned-sub-overflow-check.ll | 2 +- llvm/test/Transforms/InstCombine/urem-via-udiv-mul-sub.ll | 2 +- .../variable-signext-of-variable-high-bit-extraction.ll | 2 +- 73 files changed, 75 insertions(+), 75 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/cmp-x-vs-neg-x.ll b/llvm/test/Transforms/InstCombine/cmp-x-vs-neg-x.ll index cd5446112cdff..f5268ecb433dd 100644 --- a/llvm/test/Transforms/InstCombine/cmp-x-vs-neg-x.ll +++ b/llvm/test/Transforms/InstCombine/cmp-x-vs-neg-x.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s declare i8 @gen8() declare void @use8(i8) diff --git a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll index cb4d38d6641ea..d19e860a2c1da 100644 --- a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll +++ b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we extract (via lshr) some high bits, and then perform their sign-extension ; conditionally depending on whether the extracted value is negative or not diff --git a/llvm/test/Transforms/InstCombine/do-not-clone-dbg-declare.ll b/llvm/test/Transforms/InstCombine/do-not-clone-dbg-declare.ll index 0d9a0aa324605..bef6838393f32 100644 --- a/llvm/test/Transforms/InstCombine/do-not-clone-dbg-declare.ll +++ b/llvm/test/Transforms/InstCombine/do-not-clone-dbg-declare.ll @@ -1,5 +1,5 @@ -; RUN: opt %s -instcombine -verify -S -o - | FileCheck %s +; RUN: opt < %s -instcombine -verify -S -o - | FileCheck %s ; Hand-reduced from this example. ; -g -O -mllvm -disable-llvm-optzns -gno-column-info diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll index b9bce627d2abd..7c98a2725a339 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @t0(i64 %x) { ; CHECK-LABEL: @t0( diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll index 18a87273c0216..8621ebcf14cae 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s define i64 @t0(i64 %x) { ; CHECK-LABEL: @t0( diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll index 4d85c095c4440..5a0b3e34829d3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s define i1 @icmp_ugt_32(i64) { ; CHECK-LABEL: @icmp_ugt_32( diff --git a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll index f7ae19657297f..4a80a885be399 100644 --- a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll +++ b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll @@ -1,6 +1,6 @@ -; RUN: opt %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info -; RUN: opt %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s -; RUN: opt %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s +; RUN: opt < %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info +; RUN: opt < %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s +; RUN: opt < %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s ; Demonstrate that Predicate Canonicalization (InstCombine) does not invalidate PostDomTree ; if the basic block is post-dom unreachable. diff --git a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll index d2b72cb68bda9..b81796fae3c5f 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; For pattern ((X l>> Y) & ~C) ==/!= 0; when C+1 is power of 2 ; it may be optimal to fold into (X l>> Y) = C+1 diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll index 34cb558818d0b..afe96c43f258f 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; For pattern ((X l>> Y) & signbit) ==/!= 0 ; it may be optimal to fold into (X l>> Y) >=/< 0 diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll index be00822834d82..608109ac56335 100644 --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s +; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s @var_7 = external global i8, align 1 @var_1 = external global i32, align 4 diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll index bc84e7a61afcf..89c16a0949e83 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll index 65bc2f244e0ab..8aef637c6a74d 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll index 9203550b7db33..61f25e6ca0b18 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index 5c08183b999a8..077bb8296f3e9 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll index 1bfd95587e937..961ea5e48416e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, lshr then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll index 205dcfbf22cbf..41a71aa98f407 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index 4d3d5432375c7..787135229148e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index fc964f8725b5e..c0959d9e1ac6d 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 83c0df1d83e44..5e0f0be2b1ade 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll index 200d414d82587..2e335f0083c17 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/pr21891.ll b/llvm/test/Transforms/InstCombine/pr21891.ll index 8194976b6233c..1473838252ce7 100644 --- a/llvm/test/Transforms/InstCombine/pr21891.ll +++ b/llvm/test/Transforms/InstCombine/pr21891.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -instcombine +; RUN: opt < %s -instcombine define i32 @f(i32 %theNumber) { entry: diff --git a/llvm/test/Transforms/InstCombine/pr38897.ll b/llvm/test/Transforms/InstCombine/pr38897.ll index 0b10f3510e5f6..1eb2cb9a4677c 100644 --- a/llvm/test/Transforms/InstCombine/pr38897.ll +++ b/llvm/test/Transforms/InstCombine/pr38897.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @sharpening(i32 %b340, i1 %c, i1 %d, i32 %e, i32 %f, i32 %g, i32 %h) { ; CHECK-LABEL: @sharpening( diff --git a/llvm/test/Transforms/InstCombine/pr38915.ll b/llvm/test/Transforms/InstCombine/pr38915.ll index f259cdefe9c16..22adb9eff7752 100644 --- a/llvm/test/Transforms/InstCombine/pr38915.ll +++ b/llvm/test/Transforms/InstCombine/pr38915.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @PR38915(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @PR38915( diff --git a/llvm/test/Transforms/InstCombine/pr41164.ll b/llvm/test/Transforms/InstCombine/pr41164.ll index 372debab8ecfe..8e98e2aa57e8c 100644 --- a/llvm/test/Transforms/InstCombine/pr41164.ll +++ b/llvm/test/Transforms/InstCombine/pr41164.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s @wyhash64_x = global i64 0, align 8 diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll index d11a5c3221769..8aea13b5d5452 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll index 960f2c9e8abad..e052560df836e 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll index 3cc3e1028ae10..a9efc9e0cf5f5 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll index 04a0966f14824..1719bae187cbb 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll index 64e7c6bb670c3..2f7e64bd5735a 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, lshr then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll index aaaeb43fd496f..546c342615a9d 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, ashr then performs ; left-shift of those bits, we can combine those two shifts into a shift+mask. diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll index e7145fcccd7d7..3a9e994dffaf7 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, if none of the bits that are left after the final diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll index 94e786d38b088..b0e23bcf60704 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, if none of the bits that are left after the final diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll index fb480e3f3ecd7..1ef139bd563e8 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, if none of the bits that are left after the final diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll index 0871e82dd28de..a0d99fb2d0309 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, if none of the bits that are left after the final diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll index f3682ef8cdde8..c287a32bbf102 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, if none of the bits that are left after the final diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll index 3e2bb330ab6c2..386a5d5aa7343 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have some pattern that leaves only some low bits set, and then performs ; left-shift of those bits, if none of the bits that are left after the final diff --git a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll index 0392026752504..5abdebe088c72 100644 --- a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll +++ b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have: ; (data & (-1 << nbits)) outer>> nbits diff --git a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll index 5b48b5e30ff45..2b4686004abc2 100644 --- a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll +++ b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-is-non-zero-and-no-underflow.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s declare void @use8(i8) diff --git a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll index b8a29e4fab25e..7b29bcdd25315 100644 --- a/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll +++ b/llvm/test/Transforms/InstCombine/result-of-add-of-negative-or-zero-is-non-zero-and-no-underflow.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s declare void @use8(i8) diff --git a/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll b/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll index d46da5ec863e4..ae70b9259f0b9 100644 --- a/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll +++ b/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Here we subtract two values, check that subtraction did not overflow AND ; that the result is non-zero. This can be simplified just to a comparison diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll index 78858421024ea..e19b7ad9c3db7 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; x s/EXACT (-1 << y) diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll index 40e8c9634700c..282584b9888e1 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; x s/EXACT (1 << y) diff --git a/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll index f9dd32bfc612d..f8c3dff195880 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; x s/ (-1 << y) diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll index 118186f98a9e1..0180828a93554 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll index 0c3ee460948c0..fd5a3a3383336 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll index 97506e193e602..bcf5d46ad974c 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll index c4e337548d12a..944d443733ab6 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; (trunc (iSrc x a>> Q) to iDst) a>> K diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll index edb34fbb9e96a..1d88d4d1625dd 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; (trunc (iSrc x l>> Q) to iDst) l>> K diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll index 0808b9a3763ef..59fef0a1b17f5 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; (trunc (x << Q) to iDst) << K diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll index 96461691e70b3..8c757db6080aa 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Given pattern: ; (x shiftopcode Q) shiftopcode K diff --git a/llvm/test/Transforms/InstCombine/shift-by-signext.ll b/llvm/test/Transforms/InstCombine/shift-by-signext.ll index b049dfac6f855..a24fd04f2a663 100644 --- a/llvm/test/Transforms/InstCombine/shift-by-signext.ll +++ b/llvm/test/Transforms/InstCombine/shift-by-signext.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; If we have a shift by sign-extended value, we can replace sign-extension ; with zero-extension. diff --git a/llvm/test/Transforms/InstCombine/shift-direction-in-bit-test.ll b/llvm/test/Transforms/InstCombine/shift-direction-in-bit-test.ll index 3c439192bd386..2969edf505a19 100644 --- a/llvm/test/Transforms/InstCombine/shift-direction-in-bit-test.ll +++ b/llvm/test/Transforms/InstCombine/shift-direction-in-bit-test.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; The pattern: ; icmp eq/ne (and (X shift Y), Z), 0 diff --git a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll index 772ed3be775fb..567c369c3acf0 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; For pattern ((X << Y) & ~C) ==/!= 0; when C+1 is power of 2 ; it may be optimal to fold into (X << Y) = C+1 diff --git a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll index 9fb17cdb97faa..c1a50a69a9a85 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; For pattern ((X << Y) & signbit) ==/!= 0 ; it may be optimal to fold into (X << Y) >=/< 0 diff --git a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll index 72c2132970804..40089f6011585 100644 --- a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; For pattern (X & (signbit l>> Y)) ==/!= 0 ; it may be optimal to fold into (X << Y) >=/< 0 diff --git a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll index 674d0c28d0185..1d76fa5ffa37c 100644 --- a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; For pattern (X & (signbit << Y)) ==/!= 0 ; it may be optimal to fold into (X l>> Y) >=/< 0 diff --git a/llvm/test/Transforms/InstCombine/srem-via-sdiv-mul-sub.ll b/llvm/test/Transforms/InstCombine/srem-via-sdiv-mul-sub.ll index 8690faa7a614c..502b2225b4670 100644 --- a/llvm/test/Transforms/InstCombine/srem-via-sdiv-mul-sub.ll +++ b/llvm/test/Transforms/InstCombine/srem-via-sdiv-mul-sub.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; x - ((x / y) * y) diff --git a/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll b/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll index 9abb588a4ee0b..9ebad37f2ad51 100644 --- a/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll +++ b/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s declare void @llvm.assume(i1) declare i8 @gen8() diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index 0755ebfff1621..92d1ac225a01b 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s declare void @use4(i4) declare void @use8(i8) diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-add.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-add.ll index b39f2bb8becc8..47fcabcf6ba73 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-add.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-add.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Should fold ; (%x + %y) u>= %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll index f48bd6c9abc6e..71ebd74dda44b 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Should fold ; (%y ^ -1) u>= %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll index 29c195f3a6c30..0b340059d92a8 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Should fold ; (%x + %y) u>= %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-add.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-add.ll index a614142ef4504..86eeea93fc15a 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-add.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-add.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Should fold ; (%x + %y) u< %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll index e8fa5c84d1496..f6b2edbb3a9a7 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Should fold ; (%y ^ -1) u< %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll index d1ab9ae41d617..152ed89c6f37c 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Should fold ; (%x + %y) u< %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll index faa4bedb2905c..0024ad0530e74 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; ((%x * %y) u/ %x) == %y diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll index cfc7a01d76432..38e1b10bc3eab 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; (-1 u/ %x) u>= %y diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-mul-udiv.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-mul-udiv.ll index a1c09f567873f..8f9a8461d717c 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-mul-udiv.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-mul-udiv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; ((%x * %y) u/ %x) != %y diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll index f4ab3c23b6990..668a44a7c27fc 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; (-1 u/ %x) u< %y diff --git a/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll index d048841c9850b..4f7961995a5fd 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; (%x - %y) u<= %x diff --git a/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll index 124150cacf03e..c27421cece01e 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; (%x - %y) u> %x diff --git a/llvm/test/Transforms/InstCombine/urem-via-udiv-mul-sub.ll b/llvm/test/Transforms/InstCombine/urem-via-udiv-mul-sub.ll index 0e28a0d72c3df..5c80525b54583 100644 --- a/llvm/test/Transforms/InstCombine/urem-via-udiv-mul-sub.ll +++ b/llvm/test/Transforms/InstCombine/urem-via-udiv-mul-sub.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s ; Fold ; x - ((x / y) * y) diff --git a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll index a5f38735a3738..1c135182e55f8 100644 --- a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll +++ b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s declare void @use16(i16) declare void @use32(i32) From 01c95f79424d1083a6c06ed28c98c0a647b85711 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Tue, 29 Sep 2020 10:12:58 -0500 Subject: [PATCH 034/544] [SVE] Fix typo in CHECK lines for sve-fixed-length-int-reduce.ll --- .../AArch64/sve-fixed-length-int-reduce.ll | 250 +++++++++--------- 1 file changed, 125 insertions(+), 125 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll index 6989f39f51972..8e3ef87908f73 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -134,11 +134,11 @@ define i16 @uaddv_v32i16(<32 x i16>* %a) #0 { define i16 @uaddv_v64i16(<64 x i16>* %a) #0 { ; CHECK-LABEL: uaddv_v64i16: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].h, vl64 -; VBITS_GE_1048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].h -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %op) ret i16 %res @@ -200,11 +200,11 @@ define i32 @uaddv_v16i32(<16 x i32>* %a) #0 { define i32 @uaddv_v32i32(<32 x i32>* %a) #0 { ; CHECK-LABEL: uaddv_v32i32: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].s, vl32 -; VBITS_GE_1048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %op) ret i32 %res @@ -212,11 +212,11 @@ define i32 @uaddv_v32i32(<32 x i32>* %a) #0 { define i32 @uaddv_v64i32(<64 x i32>* %a) #0 { ; CHECK-LABEL: uaddv_v64i32: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].s, vl64 -; VBITS_GE_2096-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_2086-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.add.v64i32(<64 x i32> %op) ret i32 %res @@ -266,11 +266,11 @@ define i64 @uaddv_v8i64(<8 x i64>* %a) #0 { define i64 @uaddv_v16i64(<16 x i64>* %a) #0 { ; CHECK-LABEL: uaddv_v16i64: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].d, vl16 -; VBITS_GE_1048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %op) ret i64 %res @@ -278,11 +278,11 @@ define i64 @uaddv_v16i64(<16 x i64>* %a) #0 { define i64 @uaddv_v32i64(<32 x i64>* %a) #0 { ; CHECK-LABEL: uaddv_v32i64: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].d, vl32 -; VBITS_GE_2096-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_2096-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.add.v32i64(<32 x i64> %op) ret i64 %res @@ -402,11 +402,11 @@ define i16 @smaxv_v32i16(<32 x i16>* %a) #0 { define i16 @smaxv_v64i16(<64 x i16>* %a) #0 { ; CHECK-LABEL: smaxv_v64i16: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].h, vl64 -; VBITS_GE_1048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h -; VBITS_GE_1048-NEXT: fmov w0, s[[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %op) ret i16 %res @@ -468,11 +468,11 @@ define i32 @smaxv_v16i32(<16 x i32>* %a) #0 { define i32 @smaxv_v32i32(<32 x i32>* %a) #0 { ; CHECK-LABEL: smaxv_v32i32: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].s, vl32 -; VBITS_GE_1048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %op) ret i32 %res @@ -480,11 +480,11 @@ define i32 @smaxv_v32i32(<32 x i32>* %a) #0 { define i32 @smaxv_v64i32(<64 x i32>* %a) #0 { ; CHECK-LABEL: smaxv_v64i32: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].s, vl64 -; VBITS_GE_2096-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_2086-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.smax.v64i32(<64 x i32> %op) ret i32 %res @@ -536,11 +536,11 @@ define i64 @smaxv_v8i64(<8 x i64>* %a) #0 { define i64 @smaxv_v16i64(<16 x i64>* %a) #0 { ; CHECK-LABEL: smaxv_v16i64: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].d, vl16 -; VBITS_GE_1048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %op) ret i64 %res @@ -548,11 +548,11 @@ define i64 @smaxv_v16i64(<16 x i64>* %a) #0 { define i64 @smaxv_v32i64(<32 x i64>* %a) #0 { ; CHECK-LABEL: smaxv_v32i64: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].d, vl32 -; VBITS_GE_2096-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_2096-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.smax.v32i64(<32 x i64> %op) ret i64 %res @@ -672,11 +672,11 @@ define i16 @sminv_v32i16(<32 x i16>* %a) #0 { define i16 @sminv_v64i16(<64 x i16>* %a) #0 { ; CHECK-LABEL: sminv_v64i16: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].h, vl64 -; VBITS_GE_1048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: sminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h -; VBITS_GE_1048-NEXT: fmov w0, s[[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: sminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %op) ret i16 %res @@ -738,11 +738,11 @@ define i32 @sminv_v16i32(<16 x i32>* %a) #0 { define i32 @sminv_v32i32(<32 x i32>* %a) #0 { ; CHECK-LABEL: sminv_v32i32: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].s, vl32 -; VBITS_GE_1048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_1048-NEXT: fmov w0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %op) ret i32 %res @@ -750,11 +750,11 @@ define i32 @sminv_v32i32(<32 x i32>* %a) #0 { define i32 @sminv_v64i32(<64 x i32>* %a) #0 { ; CHECK-LABEL: sminv_v64i32: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].s, vl64 -; VBITS_GE_2096-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_2086-NEXT: fmov w0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.smin.v64i32(<64 x i32> %op) ret i32 %res @@ -806,11 +806,11 @@ define i64 @sminv_v8i64(<8 x i64>* %a) #0 { define i64 @sminv_v16i64(<16 x i64>* %a) #0 { ; CHECK-LABEL: sminv_v16i64: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].d, vl16 -; VBITS_GE_1048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %op) ret i64 %res @@ -818,11 +818,11 @@ define i64 @sminv_v16i64(<16 x i64>* %a) #0 { define i64 @sminv_v32i64(<32 x i64>* %a) #0 { ; CHECK-LABEL: sminv_v32i64: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].d, vl32 -; VBITS_GE_2096-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_2096-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.smin.v32i64(<32 x i64> %op) ret i64 %res @@ -942,11 +942,11 @@ define i16 @umaxv_v32i16(<32 x i16>* %a) #0 { define i16 @umaxv_v64i16(<64 x i16>* %a) #0 { ; CHECK-LABEL: umaxv_v64i16: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].h, vl64 -; VBITS_GE_1048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h -; VBITS_GE_1048-NEXT: fmov w0, s[[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %op) ret i16 %res @@ -1008,11 +1008,11 @@ define i32 @umaxv_v16i32(<16 x i32>* %a) #0 { define i32 @umaxv_v32i32(<32 x i32>* %a) #0 { ; CHECK-LABEL: umaxv_v32i32: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].s, vl32 -; VBITS_GE_1048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %op) ret i32 %res @@ -1020,11 +1020,11 @@ define i32 @umaxv_v32i32(<32 x i32>* %a) #0 { define i32 @umaxv_v64i32(<64 x i32>* %a) #0 { ; CHECK-LABEL: umaxv_v64i32: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].s, vl64 -; VBITS_GE_2096-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_2086-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.umax.v64i32(<64 x i32> %op) ret i32 %res @@ -1076,11 +1076,11 @@ define i64 @umaxv_v8i64(<8 x i64>* %a) #0 { define i64 @umaxv_v16i64(<16 x i64>* %a) #0 { ; CHECK-LABEL: umaxv_v16i64: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].d, vl16 -; VBITS_GE_1048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %op) ret i64 %res @@ -1088,11 +1088,11 @@ define i64 @umaxv_v16i64(<16 x i64>* %a) #0 { define i64 @umaxv_v32i64(<32 x i64>* %a) #0 { ; CHECK-LABEL: umaxv_v32i64: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].d, vl32 -; VBITS_GE_2096-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_2096-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.umax.v32i64(<32 x i64> %op) ret i64 %res @@ -1212,11 +1212,11 @@ define i16 @uminv_v32i16(<32 x i16>* %a) #0 { define i16 @uminv_v64i16(<64 x i16>* %a) #0 { ; CHECK-LABEL: uminv_v64i16: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].h, vl64 -; VBITS_GE_1048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: uminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h -; VBITS_GE_1048-NEXT: fmov w0, s[[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: uminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <64 x i16>, <64 x i16>* %a %res = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %op) ret i16 %res @@ -1278,11 +1278,11 @@ define i32 @uminv_v16i32(<16 x i32>* %a) #0 { define i32 @uminv_v32i32(<32 x i32>* %a) #0 { ; CHECK-LABEL: uminv_v32i32: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].s, vl32 -; VBITS_GE_1048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_1048-NEXT: fmov w0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <32 x i32>, <32 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %op) ret i32 %res @@ -1290,11 +1290,11 @@ define i32 @uminv_v32i32(<32 x i32>* %a) #0 { define i32 @uminv_v64i32(<64 x i32>* %a) #0 { ; CHECK-LABEL: uminv_v64i32: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].s, vl64 -; VBITS_GE_2096-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s -; VBITS_GE_2086-NEXT: fmov w0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <64 x i32>, <64 x i32>* %a %res = call i32 @llvm.experimental.vector.reduce.umin.v64i32(<64 x i32> %op) ret i32 %res @@ -1346,11 +1346,11 @@ define i64 @uminv_v8i64(<8 x i64>* %a) #0 { define i64 @uminv_v16i64(<16 x i64>* %a) #0 { ; CHECK-LABEL: uminv_v16i64: -; VBITS_GE_1048: ptrue [[PG:p[0-9]+]].d, vl16 -; VBITS_GE_1048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_1048-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_1048-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_1048-NEXT: ret +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_1024-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %op) ret i64 %res @@ -1358,11 +1358,11 @@ define i64 @uminv_v16i64(<16 x i64>* %a) #0 { define i64 @uminv_v32i64(<32 x i64>* %a) #0 { ; CHECK-LABEL: uminv_v32i64: -; VBITS_GE_2096: ptrue [[PG:p[0-9]+]].d, vl32 -; VBITS_GE_2096-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_GE_2096-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d -; VBITS_GE_2096-NEXT: fmov x0, [[REDUCE]] -; VBITS_GE_2096-NEXT: ret +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]] +; VBITS_GE_2048-NEXT: ret %op = load <32 x i64>, <32 x i64>* %a %res = call i64 @llvm.experimental.vector.reduce.umin.v32i64(<32 x i64> %op) ret i64 %res From e456df77c2a5a2bf905f6848a09faf69b49c5752 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Mon, 28 Sep 2020 17:21:00 -0700 Subject: [PATCH 035/544] [scudo][standalone] Fix Primary's ReleaseToOS test Said test was flaking on Fuchsia for non-obvious reasons, and only for ASan variants (the release was returning 0). It turned out that the templating was off, `true` being promoted to a `s32` and used as the minimum interval argument. This meant that in some circumstances, the normal release would occur, and the forced release would have nothing to release, hence the 0 byte released. The symbols are giving it away (note the 1): ``` scudo::SizeClassAllocator64,24ul,1,2147483647,false>::releaseToOS(void) ``` This also probably means that there was no MTE version of that test! Differential Revision: https://reviews.llvm.org/D88457 --- .../lib/scudo/standalone/tests/primary_test.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 605ce44d49739..67d1fe52acef9 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -58,7 +58,8 @@ TEST(ScudoPrimaryTest, BasicPrimary) { testPrimary>(); #endif testPrimary>(); - testPrimary>(); + testPrimary>(); } // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes. @@ -144,7 +145,8 @@ TEST(ScudoPrimaryTest, PrimaryIterate) { testIteratePrimary>(); #endif testIteratePrimary>(); - testIteratePrimary>(); + testIteratePrimary>(); } static std::mutex Mutex; @@ -205,7 +207,8 @@ TEST(ScudoPrimaryTest, PrimaryThreaded) { testPrimaryThreaded>(); #endif testPrimaryThreaded>(); - testPrimaryThreaded>(); + testPrimaryThreaded>(); } // Through a simple allocation that spans two pages, verify that releaseToOS @@ -236,5 +239,6 @@ TEST(ScudoPrimaryTest, ReleaseToOS) { testReleaseToOS>(); #endif testReleaseToOS>(); - testReleaseToOS>(); + testReleaseToOS>(); } From c816ee13ad9e14ec1b69e07b1d3851d8b548ff8c Mon Sep 17 00:00:00 2001 From: Manoel Roemmer Date: Tue, 29 Sep 2020 16:21:09 +0200 Subject: [PATCH 036/544] [OpenMP][VE plugin] Fixing failure to build VE plugin with consolidated error handling in libomptarget The libomptarget VE plugin [[ http://lab.llvm.org:8014/builders/clang-ve-ninja/builds/8937/steps/build-unified-tree/logs/stdio | fails zu build ]] after ae95ceeb8f98d81f615c69da02f73b5ee6b1519a . Differential Revision: https://reviews.llvm.org/D88476 --- openmp/libomptarget/plugins/ve/src/rtl.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp index 414fc62650fbc..1994389deb423 100644 --- a/openmp/libomptarget/plugins/ve/src/rtl.cpp +++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp @@ -11,9 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "Debug.h" -#include "omptargetplugin.h" - #include #include #include @@ -26,14 +23,19 @@ #include #include -#ifndef TARGET_ELF_ID -#define TARGET_ELF_ID 0 -#endif +#include "Debug.h" +#include "omptargetplugin.h" +#ifndef TARGET_NAME #define TARGET_NAME VE +#endif #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL" +#ifndef TARGET_ELF_ID +#define TARGET_ELF_ID 0 +#endif + #include "../../common/elf_common.c" struct DynLibTy { From 30c0bea5714400d52ab50693e4a7e738ab789400 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 16:15:35 +0100 Subject: [PATCH 037/544] SparcSubtarget.h - cleanup include dependencies. NFCI. TargetFrameLowering.h is guaranteed to be covered by SparcFrameLowering.h Fix missing implicit Triple.h dependency. --- llvm/lib/Target/Sparc/SparcSubtarget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.h b/llvm/lib/Target/Sparc/SparcSubtarget.h index 671db01e8321d..82a4aa510355a 100644 --- a/llvm/lib/Target/Sparc/SparcSubtarget.h +++ b/llvm/lib/Target/Sparc/SparcSubtarget.h @@ -16,8 +16,8 @@ #include "SparcFrameLowering.h" #include "SparcISelLowering.h" #include "SparcInstrInfo.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include From 8f34216ece714f30f08092d108b85c0e7e77384a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 16:29:51 +0100 Subject: [PATCH 038/544] NVPTXTargetMachine.h - remove unused includes. NFCI. --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index d84600c74e292..5b1e77958eb1b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -15,8 +15,6 @@ #include "ManagedStringPool.h" #include "NVPTXSubtarget.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { From a06581ef39284f8ec66778807adf0e2d244a33d1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 16:36:58 +0100 Subject: [PATCH 039/544] MSP430TargetMachine.h - remove unused includes. NFCI. --- llvm/lib/Target/MSP430/MSP430TargetMachine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/llvm/lib/Target/MSP430/MSP430TargetMachine.h index 96fbc3ba03779..ef757dc7cb78a 100644 --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.h +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.h @@ -15,10 +15,10 @@ #define LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H #include "MSP430Subtarget.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { +class StringRef; /// MSP430TargetMachine /// From a9f63d22fafb0d7de768efc6b7447f8e7f6bb220 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 29 Sep 2020 17:06:13 +0200 Subject: [PATCH 040/544] [clangd] Disable msan instrumentation for generated Evaluate(). MSAN build times out for generated DecisionForest inference runtime. A solution worth trying is splitting the function into 300 smaller functions and then re-enable msan. For now we are disabling instrumentation for the generated function. Differential Revision: https://reviews.llvm.org/D88495 --- clang-tools-extra/clangd/quality/CompletionModelCodegen.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py index 20bfccd8806f5..423e5d14cf523 100644 --- a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py @@ -145,6 +145,7 @@ class can be used to represent a code completion candidate. return """#ifndef %s #define %s #include +#include "llvm/Support/Compiler.h" %s class %s { @@ -160,6 +161,9 @@ class %s { friend float Evaluate(const %s&); }; +// The function may have large number of lines of code. MSAN +// build times out in such case. +LLVM_NO_SANITIZE("memory") float Evaluate(const %s&); %s #endif // %s From 119274748bce6d1248aa57cb55d79bfeae8a2f8e Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Tue, 29 Sep 2020 08:48:07 -0700 Subject: [PATCH 041/544] NFC, add a missing stdlib include for the use of abort The FatalErrorHandler.cpp file uses 'abort', but doesn't include 'stdlib.h'. This causes a build error when modules are used in clang. --- clang/tools/libclang/FatalErrorHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/tools/libclang/FatalErrorHandler.cpp b/clang/tools/libclang/FatalErrorHandler.cpp index 6b435fcfcc954..ef21569637f03 100644 --- a/clang/tools/libclang/FatalErrorHandler.cpp +++ b/clang/tools/libclang/FatalErrorHandler.cpp @@ -9,6 +9,7 @@ #include "clang-c/FatalErrorHandler.h" #include "llvm/Support/ErrorHandling.h" +#include static void aborting_fatal_error_handler(void *, const std::string &reason, bool) { From d0ed45dc920004bb7b6642d6086b4722443eeba2 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 08:56:27 -0700 Subject: [PATCH 042/544] [lldb] Configure LLDB_FRAMEWORK_DIR in multi-generator builds --- lldb/test/API/CMakeLists.txt | 1 + lldb/utils/lldb-dotest/CMakeLists.txt | 5 +++++ lldb/utils/lldb-dotest/lldb-dotest.in | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index f07370adb5c28..6c7f54e39123c 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -139,6 +139,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} config_runtime_output_dir ${LLVM_RUNTIME_OUTPUT_INTDIR}) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_DOTEST_ARGS "${LLDB_DOTEST_ARGS}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index e5a73c2b1dec3..2f9ba72d7b223 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -21,6 +21,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} config_runtime_output_dir ${LLVM_RUNTIME_OUTPUT_INTDIR}) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_DOTEST_ARGS_CONFIGURED "${LLDB_DOTEST_ARGS}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") @@ -33,6 +34,7 @@ if(LLDB_BUILT_STANDALONE) # Multi-configuration generator like Xcode (with a matching config). string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_DOTEST_ARGS_CONFIGURED "${LLDB_DOTEST_ARGS}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") @@ -44,6 +46,7 @@ if(LLDB_BUILT_STANDALONE) # Single-configuration generator like Ninja. string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_DOTEST_ARGS_CONFIGURED "${LLDB_DOTEST_ARGS}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") + string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") @@ -63,6 +66,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_DOTEST_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_DOTEST_ARGS_CONFIGURED "${LLDB_DOTEST_ARGS}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") @@ -79,6 +83,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") else() set(LLDB_DOTEST_ARGS_CONFIGURED "${LLDB_DOTEST_ARGS}") set(LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") + set(LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") set(LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") set(LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") set(LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in index 86f05bea9bdcb..fedb56e938fe4 100755 --- a/lldb/utils/lldb-dotest/lldb-dotest.in +++ b/lldb/utils/lldb-dotest/lldb-dotest.in @@ -12,7 +12,7 @@ dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@' filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@' yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@' lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" -lldb_framework_dir = "@LLDB_FRAMEWORK_DIR@" +lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@" lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@" if __name__ == '__main__': From 3e5f9dacb092a1414f72500111c2b049673e0055 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Sep 2020 10:49:52 -0400 Subject: [PATCH 043/544] [libc++] Fix tests on GCC 10 Also, remove workarounds for ancient Clangs from is_constructible tests. --- libcxx/include/type_traits | 5 +- .../namespace/addressable_functions.sh.cpp | 2 +- .../meta.unary.prop/is_constructible.pass.cpp | 51 ++----------------- .../tuple.creation/tuple_cat.pass.cpp | 2 +- libcxx/utils/ci/run-buildbot.sh | 4 +- 5 files changed, 11 insertions(+), 53 deletions(-) diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 8658272c032c3..03556389e2c6c 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -2883,8 +2883,7 @@ namespace __is_construct struct __nat {}; } -#if !defined(_LIBCPP_CXX03_LANG) && (!__has_feature(is_constructible) || \ - defined(_LIBCPP_TESTING_FALLBACK_IS_CONSTRUCTIBLE)) +#if !defined(_LIBCPP_CXX03_LANG) && !__has_feature(is_constructible) && !defined(_LIBCPP_COMPILER_GCC) template struct __libcpp_is_constructible; @@ -2999,7 +2998,7 @@ struct __libcpp_is_constructible<_Tp&&, _A0> #endif -#if __has_feature(is_constructible) +#if __has_feature(is_constructible) || defined(_LIBCPP_COMPILER_GCC) template struct _LIBCPP_TEMPLATE_VIS is_constructible : public integral_constant diff --git a/libcxx/test/std/namespace/addressable_functions.sh.cpp b/libcxx/test/std/namespace/addressable_functions.sh.cpp index fb731abf306ca..72db27ffaf20a 100644 --- a/libcxx/test/std/namespace/addressable_functions.sh.cpp +++ b/libcxx/test/std/namespace/addressable_functions.sh.cpp @@ -14,7 +14,7 @@ // RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu1.o -DTU1 // RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu2.o -DTU2 -// RUN: %{cxx} %{flags} %{link_flags} %t.tu1.o %t.tu2.o -o %t.exe +// RUN: %{cxx} %t.tu1.o %t.tu2.o %{flags} %{link_flags} -o %t.exe // RUN: %{exec} %t.exe #include diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp index e3f83135fe12e..e4fad7cd36c94 100644 --- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp @@ -11,8 +11,6 @@ // template // struct is_constructible; -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_TESTING_FALLBACK_IS_CONSTRUCTIBLE - #include #include "test_macros.h" @@ -22,7 +20,6 @@ #define LIBCPP11_STATIC_ASSERT(...) ((void)0) #endif - struct A { explicit A(int); @@ -78,7 +75,6 @@ template void test_is_constructible() { static_assert( (std::is_constructible::value), ""); - LIBCPP11_STATIC_ASSERT((std::__libcpp_is_constructible::type::value), ""); #if TEST_STD_VER > 14 static_assert( std::is_constructible_v, ""); #endif @@ -88,7 +84,6 @@ template void test_is_constructible() { static_assert(( std::is_constructible::value), ""); - LIBCPP11_STATIC_ASSERT((std::__libcpp_is_constructible::type::value), ""); #if TEST_STD_VER > 14 static_assert(( std::is_constructible_v), ""); #endif @@ -98,7 +93,6 @@ template void test_is_constructible() { static_assert(( std::is_constructible::value), ""); - LIBCPP11_STATIC_ASSERT((std::__libcpp_is_constructible::type::value), ""); #if TEST_STD_VER > 14 static_assert(( std::is_constructible_v), ""); #endif @@ -108,7 +102,6 @@ template void test_is_constructible() { static_assert(( std::is_constructible::value), ""); - LIBCPP11_STATIC_ASSERT((std::__libcpp_is_constructible::type::value), ""); #if TEST_STD_VER > 14 static_assert(( std::is_constructible_v), ""); #endif @@ -118,7 +111,6 @@ template void test_is_not_constructible() { static_assert((!std::is_constructible::value), ""); - LIBCPP11_STATIC_ASSERT((!std::__libcpp_is_constructible::type::value), ""); #if TEST_STD_VER > 14 static_assert((!std::is_constructible_v), ""); #endif @@ -128,23 +120,11 @@ template void test_is_not_constructible() { static_assert((!std::is_constructible::value), ""); - LIBCPP11_STATIC_ASSERT((!std::__libcpp_is_constructible::type::value), ""); #if TEST_STD_VER > 14 static_assert((!std::is_constructible_v), ""); #endif } -#if TEST_STD_VER >= 11 -template (std::declval()))> -constexpr bool clang_disallows_valid_static_cast_test(int) { return false; }; - -constexpr bool clang_disallows_valid_static_cast_test(long) { return true; } - -static constexpr bool clang_disallows_valid_static_cast_bug = - clang_disallows_valid_static_cast_test(0); -#endif - - int main(int, char**) { typedef Base B; @@ -210,13 +190,17 @@ int main(int, char**) test_is_constructible(); test_is_not_constructible(); test_is_constructible(); +#ifndef TEST_COMPILER_GCC test_is_not_constructible(); test_is_not_constructible(); +#endif test_is_constructible(); test_is_constructible(); +#ifndef TEST_COMPILER_GCC test_is_not_constructible(); test_is_not_constructible(); +#endif // test that T must also be destructible test_is_constructible(); @@ -255,28 +239,11 @@ int main(int, char**) #endif static_assert(std::is_constructible>::value, ""); -#ifdef __clang__ -#if defined(CLANG_TEST_VER) && CLANG_TEST_VER < 400 - static_assert(clang_disallows_valid_static_cast_bug, "bug still exists"); -#endif - // FIXME Clang disallows this construction because it thinks that - // 'static_cast(declval>())' is ill-formed. - LIBCPP_STATIC_ASSERT( - clang_disallows_valid_static_cast_bug != - std::__libcpp_is_constructible>::value, ""); - ((void)clang_disallows_valid_static_cast_bug); // Prevent unused warning -#else - static_assert(clang_disallows_valid_static_cast_bug == false, ""); - LIBCPP_STATIC_ASSERT(std::__libcpp_is_constructible>::value, ""); -#endif #ifdef __clang__ // FIXME Clang and GCC disagree on the validity of this expression. test_is_constructible>(); static_assert(std::is_constructible>::value, ""); - LIBCPP_STATIC_ASSERT( - clang_disallows_valid_static_cast_bug != - std::__libcpp_is_constructible>::value, ""); #else test_is_not_constructible>(); test_is_not_constructible>(); @@ -287,21 +254,11 @@ int main(int, char**) test_is_not_constructible>(); test_is_not_constructible>(); - -// TODO: Remove this workaround once Clang <= 3.7 are no longer used regularly. -// In those compiler versions the __is_constructible builtin gives the wrong -// results for abominable function types. -#if (defined(TEST_APPLE_CLANG_VER) && TEST_APPLE_CLANG_VER < 703) \ - || (defined(TEST_CLANG_VER) && TEST_CLANG_VER < 308) -#define WORKAROUND_CLANG_BUG -#endif -#if !defined(WORKAROUND_CLANG_BUG) test_is_not_constructible(); test_is_not_constructible (); test_is_not_constructible (); test_is_not_constructible (); test_is_not_constructible (); -#endif #endif // TEST_STD_VER >= 11 return 0; diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.creation/tuple_cat.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.creation/tuple_cat.pass.cpp index c6f8d5258e07c..00c9d27ccc6d0 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.creation/tuple_cat.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.creation/tuple_cat.pass.cpp @@ -263,7 +263,7 @@ int main(int, char**) ((void)r); } { - std::tuple t1({1}); + std::tuple t1(NS::Namespaced{1}); std::tuple t = std::tuple_cat(t1); std::tuple t2 = std::tuple_cat(t1, t1); diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh index 4dd1d485d4c57..d4972b098a2cb 100755 --- a/libcxx/utils/ci/run-buildbot.sh +++ b/libcxx/utils/ci/run-buildbot.sh @@ -57,7 +57,9 @@ x86_64-ubuntu-32bit) x86_64-ubuntu-gcc) export CC=gcc export CXX=g++ - args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + # FIXME: Re-enable experimental testing on GCC. GCC cares about the order + # in which we link -lc++experimental, which causes issues. + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param enable_experimental=False") ;; x86_64-ubuntu-asan) export CC=clang From ccbb9827db4c30c93b92a204aeb2b98f9f3a723a Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 09:12:29 -0700 Subject: [PATCH 044/544] [lldb] Also configure lldb_framework_dir in the lit.site.cfg.py Configuring the variable in CMake isn't enought, because the build mode can't be resolved until execution time, which requires the build mode to be substituted by lit. --- lldb/test/API/lit.site.cfg.py.in | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index f2e1f855fe390..144d17965b9ad 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -59,6 +59,7 @@ try: config.dsymutil = config.dsymutil % lit_config.params config.filecheck = config.filecheck % lit_config.params config.yaml2obj = config.yaml2obj % lit_config.params + config.lldb_framework_dir = config.lldb_framework_dir % lit_config.params config.dotest_args_str = config.dotest_args_str % lit_config.params except KeyError as e: key, = e.args From b610d73b3fe67fe6b693740dfac5fd21a60b1e44 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 16:54:13 +0100 Subject: [PATCH 045/544] [InstCombine] visitTrunc - remove dead trunc(lshr (zext A), C) combine. NFCI. I added additional test coverage at rG7a55989dc4305 - but all are handled independently of this combine and http://lab.llvm.org:8080/coverage/coverage-reports/ indicates the code is never used. Differential revision: https://reviews.llvm.org/D88492 --- .../InstCombine/InstCombineCasts.cpp | 27 +------------------ 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index fb885790d448e..1da6d0c2a92ae 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -801,32 +801,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { } } - // FIXME: Maybe combine the next two transforms to handle the no cast case - // more efficiently. Support vector types. Cleanup code by using m_OneUse. - - // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. - Value *A = nullptr; - if (Src->hasOneUse() && - match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { - // We have three types to worry about here, the type of A, the source of - // the truncate (MidSize), and the destination of the truncate. We know that - // ASize < MidSize and MidSize > ResultSize, but don't know the relation - // between ASize and ResultSize. - unsigned ASize = A->getType()->getPrimitiveSizeInBits(); - - // If the shift amount is larger than the size of A, then the result is - // known to be zero because all the input bits got shifted out. - if (Cst->getZExtValue() >= ASize) - return replaceInstUsesWith(Trunc, Constant::getNullValue(DestTy)); - - // Since we're doing an lshr and a zero extend, and know that the shift - // amount is smaller than ASize, it is always safe to do the shift in A's - // type, then zero extend or truncate to the result. - Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue()); - Shift->takeName(Src); - return CastInst::CreateIntegerCast(Shift, DestTy, false); - } - + Value *A; Constant *C; if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) { unsigned AWidth = A->getType()->getScalarSizeInBits(); From e5f047f27ec121e63e765d30683a472d1ba19ca5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 17:15:00 +0100 Subject: [PATCH 046/544] [InstCombine] Fix the outofrange tests and add exact shift tests for D88429 --- .../InstCombine/trunc-shift-trunc.ll | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index 0838e82f32c73..34050250db739 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -62,12 +62,12 @@ define <2 x i8> @trunc_lshr_trunc_uniform_undef(<2 x i64> %a) { define i8 @trunc_lshr_trunc_outofrange(i64 %a) { ; CHECK-LABEL: @trunc_lshr_trunc_outofrange( ; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 9 +; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 25 ; CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 - %c = lshr i32 %b, 9 + %c = lshr i32 %b, 25 %d = trunc i32 %c to i8 ret i8 %d } @@ -75,12 +75,12 @@ define i8 @trunc_lshr_trunc_outofrange(i64 %a) { define <2 x i8> @trunc_lshr_trunc_nonuniform_outofrange(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_nonuniform_outofrange( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = lshr <2 x i32> %b, + %c = lshr <2 x i32> %b, %d = trunc <2 x i32> %c to <2 x i8> ret <2 x i8> %d } @@ -98,6 +98,19 @@ define i8 @trunc_ashr_trunc(i64 %a) { ret i8 %d } +define i8 @trunc_ashr_trunc_exact(i64 %a) { +; CHECK-LABEL: @trunc_ashr_trunc_exact( +; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[B]], 8 +; CHECK-NEXT: [[D:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[D]] +; + %b = trunc i64 %a to i32 + %c = ashr exact i32 %b, 8 + %d = trunc i32 %c to i8 + ret i8 %d +} + define <2 x i8> @trunc_ashr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_uniform( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> @@ -140,12 +153,12 @@ define <2 x i8> @trunc_ashr_trunc_uniform_undef(<2 x i64> %a) { define i8 @trunc_ashr_trunc_outofrange(i64 %a) { ; CHECK-LABEL: @trunc_ashr_trunc_outofrange( ; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[B]], 9 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: [[C:%.*]] = ashr i32 [[B]], 25 +; CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 - %c = ashr i32 %b, 9 + %c = ashr i32 %b, 25 %d = trunc i32 %c to i8 ret i8 %d } @@ -153,12 +166,12 @@ define i8 @trunc_ashr_trunc_outofrange(i64 %a) { define <2 x i8> @trunc_ashr_trunc_nonuniform_outofrange(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_nonuniform_outofrange( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = ashr <2 x i32> [[B]], +; CHECK-NEXT: [[C:%.*]] = ashr <2 x i32> [[B]], ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = ashr <2 x i32> %b, + %c = ashr <2 x i32> %b, %d = trunc <2 x i32> %c to <2 x i8> ret <2 x i8> %d } From 7bed95a856f1e0146e838e7575677159f42f3bf7 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Tue, 29 Sep 2020 12:17:26 -0400 Subject: [PATCH 047/544] [libc++] Add a regression test for erasing from a vector After rebasing my trivially-relocatable branch, this behavior was broken... but no libc++ unit test caught it! Add a regression test specifically for erasing out of a vector. Differential Revision: https://reviews.llvm.org/D88421 --- .../vector/vector.modifiers/erase_iter.pass.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp index fb9b4bfbe4ef7..1d58d319b5bc1 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp @@ -35,6 +35,21 @@ bool Throws::sThrows = false; int main(int, char**) { + { + int a1[] = {1, 2, 3, 4, 5}; + std::vector l1(a1, a1+5); + l1.erase(l1.begin()); + assert(is_contiguous_container_asan_correct(l1)); + assert(l1 == std::vector(a1+1, a1+5)); + } + { + int a1[] = {1, 2, 3, 4, 5}; + int e1[] = {1, 3, 4, 5}; + std::vector l1(a1, a1+5); + l1.erase(l1.begin() + 1); + assert(is_contiguous_container_asan_correct(l1)); + assert(l1 == std::vector(e1, e1+4)); + } { int a1[] = {1, 2, 3}; std::vector l1(a1, a1+3); From d2d7a44facd2dc895d378f19233837147f587b6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 29 Sep 2020 11:37:35 -0500 Subject: [PATCH 048/544] [flang][msvc] Avoid templated initializer list initialization of vector. NFC. The Microsoft compiler emits an error when populating the vector with a single element of a templated argument using the brace syntax. The error is: ``` constant.h(102,1): error C2664: 'std::vector, ...>::vector(std::initializer_list<_Ty>,const _Alloc &)': cannot convert argument 1 from 'initializer list' to 'std::initializer_list<_Ty>' ``` To work around this error, we replace the templated constructor with one for the expected type. Conversion to the element type has to be done by the caller. This patch is part of the series to make flang compilable with MS Visual Studio . Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D88163 --- flang/include/flang/Evaluate/constant.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/include/flang/Evaluate/constant.h b/flang/include/flang/Evaluate/constant.h index a25916f94ef79..a9f6e87c9db03 100644 --- a/flang/include/flang/Evaluate/constant.h +++ b/flang/include/flang/Evaluate/constant.h @@ -97,8 +97,7 @@ class ConstantBase : public ConstantBounds { template ConstantBase(const A &x, Result res = Result{}) : result_{res}, values_{x} {} - template > - ConstantBase(A &&x, Result res = Result{}) + ConstantBase(ELEMENT &&x, Result res = Result{}) : result_{res}, values_{std::move(x)} {} ConstantBase( std::vector &&, ConstantSubscripts &&, Result = Result{}); From a9abe1f7859e4e1293969a93213294584a5aaba9 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 28 Sep 2020 17:17:23 -0700 Subject: [PATCH 049/544] [COFF][CG Profile] set undefined symbol to external Differential Revision: https://reviews.llvm.org/D88456 --- llvm/lib/MC/MCWinCOFFStreamer.cpp | 4 +--- llvm/test/MC/COFF/cgprofile.s | 28 +++------------------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index d8fde4004d44b..520d4a0246915 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -340,10 +340,8 @@ void MCWinCOFFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) { const MCSymbol *S = &SRE->getSymbol(); bool Created; getAssembler().registerSymbol(*S, &Created); - if (Created) { - cast(S)->setIsWeakExternal(); + if (Created) cast(S)->setExternal(true); - } } void MCWinCOFFStreamer::finalizeCGProfile() { diff --git a/llvm/test/MC/COFF/cgprofile.s b/llvm/test/MC/COFF/cgprofile.s index a0c47a69c0692..c2f95038b370e 100644 --- a/llvm/test/MC/COFF/cgprofile.s +++ b/llvm/test/MC/COFF/cgprofile.s @@ -30,7 +30,7 @@ late3: # CHECK-NEXT: ] # CHECK-NEXT: SectionData ( # CHECK-NEXT: 0000: 0A000000 0E000000 20000000 00000000 -# CHECK-NEXT: 0010: 11000000 0A000000 0B000000 00000000 +# CHECK-NEXT: 0010: 0F000000 0A000000 0B000000 00000000 # CHECK-NEXT: 0020: 0B000000 0C000000 14000000 00000000 # CHECK-NEXT: ) @@ -68,37 +68,15 @@ late3: # CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED # CHECK-NEXT: BaseType: # CHECK-NEXT: ComplexType: -# CHECK-NEXT: StorageClass: WeakExternal -# CHECK-NEXT: AuxSymbolCount: 1 -# CHECK-NEXT: AuxWeakExternal { -# CHECK-NEXT: Linked: .weak.b.default.late -# CHECK-NEXT: Search: Alias -# CHECK-NEXT: } -# CHECK: Name: .weak.b.default.late -# CHECK-NEXT: Value: -# CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE -# CHECK-NEXT: BaseType: -# CHECK-NEXT: ComplexType: # CHECK-NEXT: StorageClass: External -# CHECK-NEXT: AuxSymbolCount: 0 +# CHECK-NEXT: AuxSymbolCount: # CHECK: Name: freq # CHECK-NEXT: Value: # CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED # CHECK-NEXT: BaseType: # CHECK-NEXT: ComplexType: -# CHECK-NEXT: StorageClass: WeakExternal -# CHECK-NEXT: AuxSymbolCount: 1 -# CHECK-NEXT: AuxWeakExternal { -# CHECK-NEXT: Linked: .weak.freq.default.late -# CHECK-NEXT: Search: Alias -# CHECK-NEXT: } -# CHECK: Name: .weak.freq.default.late -# CHECK-NEXT: Value: -# CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE -# CHECK-NEXT: BaseType: -# CHECK-NEXT: ComplexType: # CHECK-NEXT: StorageClass: External -# CHECK-NEXT: AuxSymbolCount: 0 +# CHECK-NEXT: AuxSymbolCount: # CHECK: CGProfile [ # CHECK-NEXT: CGProfileEntry { From 2159ed811f96cb1aefecf3369d80e69e06fd32a3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Sep 2020 12:48:44 -0400 Subject: [PATCH 050/544] [libc++][ci] Update how we build the Docker image This fixes a couple of issues, such as failing filesystem tests (due to running the tests as root), and not running with the GCC we downloaded. --- libcxx/utils/ci/Dockerfile | 44 ++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index a83936b47083d..f0de9f64ba913 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -24,12 +24,33 @@ # as /llvm in the container. Be careful, the state in /llvm is shared between # the container and the host machine. # +# Finally, a pre-built version of this image is available on DockerHub as +# ldionne/libcxx-builder. To use the pre-built version of the image, use +# +# $ docker pull ldionne/libcxx-builder +# $ docker run -it ldionne/libcxx-builder +# +# To update the image, rebuild it and push it to ldionne/libcxx-builder (which +# will obviously only work if you have permission to do so). +# +# $ docker build -t ldionne/libcxx-builder . +# $ docker push ldionne/libcxx-builder +# FROM ubuntu:bionic RUN apt-get update RUN apt-get install -y bash curl +# Install various tools used by the build or the test suite +RUN apt-get install -y ninja-build python3 sphinx-doc git + +# Install the Phabricator Python module to allow uploading results to Phabricator. +# This MUST be done before installing a recent GCC, otherwise /usr/bin/gcc is +# overwritten to an older GCC. +RUN apt-get install -y python3-pip +RUN pip3 install phabricator + # Install the most recently released LLVM RUN apt-get install -y lsb-release wget software-properties-common RUN bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" @@ -39,25 +60,26 @@ RUN ln -s $(find /usr/bin -regex '.+/clang-[a-zA-Z0-9.]+') /usr/bin/clang # Install a recent GCC RUN add-apt-repository ppa:ubuntu-toolchain-r/test RUN apt-get update && apt install -y gcc-10 g++-10 -RUN ln -s $(find /usr/bin -regex '.+/g\+\+-[a-zA-Z0-9.]+') /usr/bin/g++ -RUN ln -s $(find /usr/bin -regex '.+/gcc-[a-zA-Z0-9.]+') /usr/bin/gcc +RUN ln -f -s /usr/bin/g++-10 /usr/bin/g++ +RUN ln -f -s /usr/bin/gcc-10 /usr/bin/gcc # Install a recent CMake RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.2/cmake-3.18.2-Linux-x86_64.sh -O /tmp/install-cmake.sh RUN bash /tmp/install-cmake.sh --prefix=/usr --exclude-subdir --skip-license RUN rm /tmp/install-cmake.sh -# Install other tools used by the build or the test suite -RUN apt-get install -y ninja-build python3 sphinx-doc +# Change the user to a non-root user, since some of the libc++ tests +# (e.g. filesystem) require running as non-root. Also setup passwordless sudo. +RUN apt-get install -y sudo +RUN echo "ALL ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers +RUN useradd --create-home libcxx-builder +USER libcxx-builder +WORKDIR /home/libcxx-builder -# Install the Buildkite agent and dependencies +# Install the Buildkite agent and dependencies. This must be done as non-root +# for the Buildkite agent to be installed in a path where we can find it. RUN bash -c "$(curl -sL https://raw.githubusercontent.com/buildkite/agent/master/install.sh)" -RUN apt-get install -y git -ENV PATH="${PATH}:/root/.buildkite-agent/bin" - -# Install the Phabricator Python module to allow uploading results to Phabricator -RUN apt-get install -y python3-pip -RUN pip3 install phabricator +ENV PATH="${PATH}:/home/libcxx-builder/.buildkite-agent/bin" # By default, start the Buildkite agent (this requires a token). CMD buildkite-agent start --tags "queue=libcxx-builders" From d8ba6b4ab3eceb6bbcdf4371d4ffaab9d1a5cebe Mon Sep 17 00:00:00 2001 From: Aleksandr Platonov Date: Tue, 29 Sep 2020 19:54:33 +0300 Subject: [PATCH 051/544] [clangd] findNearbyIdentifier(): guaranteed to give up after 2^N lines As @kadircet mentions in D84912#2184144, `findNearbyIdentifier()` traverses the whole file if there is no identifier for the word. This patch ensures give up after 2^N lines in any case. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D87891 --- clang-tools-extra/clangd/XRefs.cpp | 29 ++++++++++++++----- .../clangd/unittests/XRefsTests.cpp | 5 ++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 9e8791c2a7651..9532e1418cca7 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -562,19 +562,34 @@ const syntax::Token *findNearbyIdentifier(const SpelledWord &Word, auto Cost = [&](SourceLocation Loc) -> unsigned { assert(SM.getFileID(Loc) == File && "spelled token in wrong file?"); unsigned Line = SM.getSpellingLineNumber(Loc); - if (Line > WordLine) - return 1 + llvm::Log2_64(Line - WordLine); - if (Line < WordLine) - return 2 + llvm::Log2_64(WordLine - Line); - return 0; + return Line >= WordLine ? Line - WordLine : 2 * (WordLine - Line); }; const syntax::Token *BestTok = nullptr; - // Search bounds are based on word length: 2^N lines forward. - unsigned BestCost = Word.Text.size() + 1; + unsigned BestCost = -1; + // Search bounds are based on word length: + // - forward: 2^N lines + // - backward: 2^(N-1) lines. + unsigned MaxDistance = + 1U << std::min(Word.Text.size(), + std::numeric_limits::digits - 1); + // Line number for SM.translateLineCol() should be one-based, also + // SM.translateLineCol() can handle line number greater than + // number of lines in the file. + // - LineMin = max(1, WordLine + 1 - 2^(N-1)) + // - LineMax = WordLine + 1 + 2^N + unsigned LineMin = + WordLine + 1 <= MaxDistance / 2 ? 1 : WordLine + 1 - MaxDistance / 2; + unsigned LineMax = WordLine + 1 + MaxDistance; + SourceLocation LocMin = SM.translateLineCol(File, LineMin, 1); + assert(LocMin.isValid()); + SourceLocation LocMax = SM.translateLineCol(File, LineMax, 1); + assert(LocMax.isValid()); // Updates BestTok and BestCost if Tok is a good candidate. // May return true if the cost is too high for this token. auto Consider = [&](const syntax::Token &Tok) { + if (Tok.location() < LocMin || Tok.location() > LocMax) + return true; // we are too far from the word, break the outer loop. if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word.Text)) return false; // No point guessing the same location we started with. diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index a48bb9c8c182f..40637b5fa7582 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -1428,6 +1428,11 @@ TEST(LocateSymbol, NearbyIdentifier) { // h^i + + + + + int x = hi; )cpp", R"cpp( // prefer nearest occurrence even if several matched tokens From b4968c7001c2d7e2e607bef1bb11ae8f076e47e0 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 10:04:15 -0700 Subject: [PATCH 052/544] [lldb] Remove redundant ctor call (NFC) As pointed out by Pavel in D88249. --- lldb/source/API/SBValue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index 63518b4c4e3eb..0a95cf41263dd 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -1356,7 +1356,7 @@ lldb::SBAddress SBValue::GetAddress() { } } - return LLDB_RECORD_RESULT(SBAddress(Address(addr))); + return LLDB_RECORD_RESULT(SBAddress(addr)); } lldb::SBData SBValue::GetPointeeData(uint32_t item_idx, uint32_t item_count) { From 9c77350b0c737f44732ee26b558b5f4868864a38 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 29 Sep 2020 13:12:54 -0400 Subject: [PATCH 053/544] [mlir][openacc] Add shutdown operation This patch introduces the acc.shutdown operation that represents an OpenACC shutdown directive. Clauses are derived from the spec 2.14.2 Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D88272 --- .../mlir/Dialect/OpenACC/OpenACCOps.td | 30 +++++++++++++++++++ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 19 +++++++++++- mlir/test/Dialect/OpenACC/invalid.mlir | 28 ++++++++++++++++- mlir/test/Dialect/OpenACC/ops.mlir | 28 +++++++++++++++++ 4 files changed, 103 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 0d8efcc456b44..779f588cf5b17 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -348,6 +348,36 @@ def OpenACC_InitOp : OpenACC_Op<"init", [AttrSizedOperandSegments]> { }]; } +//===----------------------------------------------------------------------===// +// 2.14.2. Shutdown +//===----------------------------------------------------------------------===// + +def OpenACC_ShutdownOp : OpenACC_Op<"shutdown", [AttrSizedOperandSegments]> { + let summary = "shutdown operation"; + + let description = [{ + The "acc.shutdown" operation represents the OpenACC shutdown executable + directive. + + Example: + + ```mlir + acc.shutdown + acc.shutdown device_num(%dev1 : i32) + ``` + }]; + + let arguments = (ins Variadic:$deviceTypeOperands, + Optional:$deviceNumOperand, + Optional:$ifCond); + + let assemblyFormat = [{ + ( `device_type` `(` $deviceTypeOperands^ `:` type($deviceTypeOperands) `)` )? + ( `device_num` `(` $deviceNumOperand^ `:` type($deviceNumOperand) `)` )? + ( `if` `(` $ifCond^ `)` )? attr-dict-with-keyword + }]; +} + //===----------------------------------------------------------------------===// // 2.14.4. Update Directive //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 515f5a9e28e8f..7ebba75389d31 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -149,6 +149,10 @@ static OptionalParseResult parserOptionalOperandAndTypeWithPrefix( return llvm::None; } +static bool isComputeOperation(Operation *op) { + return isa(op) || isa(op); +} + //===----------------------------------------------------------------------===// // ParallelOp //===----------------------------------------------------------------------===// @@ -655,12 +659,25 @@ static LogicalResult verify(acc::DataOp dataOp) { static LogicalResult verify(acc::InitOp initOp) { Operation *currOp = initOp; while ((currOp = currOp->getParentOp())) { - if (isa(currOp) || isa(currOp)) + if (isComputeOperation(currOp)) return initOp.emitOpError("cannot be nested in a compute operation"); } return success(); } +//===----------------------------------------------------------------------===// +// ShutdownOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(acc::ShutdownOp op) { + Operation *currOp = op; + while ((currOp = currOp->getParentOp())) { + if (isComputeOperation(currOp)) + return op.emitOpError("cannot be nested in a compute operation"); + } + return success(); +} + //===----------------------------------------------------------------------===// // UpdateOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 7a8a07f78f9a5..c56ccdb186f94 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -verify-diagnostics %s +// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -verify-diagnostics %s // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop gang { @@ -127,3 +127,29 @@ acc.loop { acc.init acc.yield } + +// ----- + +acc.parallel { +// expected-error@+1 {{'acc.shutdown' op cannot be nested in a compute operation}} + acc.shutdown + acc.yield +} + +// ----- + +acc.loop { +// expected-error@+1 {{'acc.shutdown' op cannot be nested in a compute operation}} + acc.shutdown + acc.yield +} + +// ----- + +acc.loop { + "some.op"() ({ + // expected-error@+1 {{'acc.shutdown' op cannot be nested in a compute operation}} + acc.shutdown + }) : () -> () + acc.yield +} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index a4dec5dcf480b..7ed4340fa3088 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -620,3 +620,31 @@ acc.init if(%ifCond) // CHECK: acc.init device_num([[I32VALUE]] : i32) // CHECK: acc.init device_num([[IDXVALUE]] : index) // CHECK: acc.init if([[IFCOND]]) + +// ----- + +%i64Value = constant 1 : i64 +%i32Value = constant 1 : i32 +%i32Value2 = constant 2 : i32 +%idxValue = constant 1 : index +%ifCond = constant true +acc.shutdown +acc.shutdown device_type(%i32Value : i32) +acc.shutdown device_type(%i32Value, %i32Value2 : i32, i32) +acc.shutdown device_num(%i64Value : i64) +acc.shutdown device_num(%i32Value : i32) +acc.shutdown device_num(%idxValue : index) +acc.shutdown if(%ifCond) + +// CHECK: [[I64VALUE:%.*]] = constant 1 : i64 +// CHECK: [[I32VALUE:%.*]] = constant 1 : i32 +// CHECK: [[I32VALUE2:%.*]] = constant 2 : i32 +// CHECK: [[IDXVALUE:%.*]] = constant 1 : index +// CHECK: [[IFCOND:%.*]] = constant true +// CHECK: acc.shutdown +// CHECK: acc.shutdown device_type([[I32VALUE]] : i32) +// CHECK: acc.shutdown device_type([[I32VALUE]], [[I32VALUE2]] : i32, i32) +// CHECK: acc.shutdown device_num([[I64VALUE]] : i64) +// CHECK: acc.shutdown device_num([[I32VALUE]] : i32) +// CHECK: acc.shutdown device_num([[IDXVALUE]] : index) +// CHECK: acc.shutdown if([[IFCOND]]) From 67aac915ba94a75cbdb3c9c5f6c8e9904829ce37 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Sun, 20 Sep 2020 18:08:27 +0900 Subject: [PATCH 054/544] [BuildLibCalls] Add noundef to the returned pointers of allocators and argument of free This patch adds noundef to the returned pointers of allocators (malloc, calloc, ...) and the pointer argument of free. The returned pointer of allocators cannot be poison or (partially) undef. Since the pointer that is given to free should precisely have zero offset, it cannot be poison or (partially) undef too. For the size arguments of allocators, noundef wasn't attached simply because I wasn't sure whether attaching it is okay or not. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D87984 --- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 24 ++++++++++++++++--- .../Transforms/InferFunctionAttrs/annotate.ll | 16 ++++++------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 64e168d48da91..2a0cdf6176109 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -105,14 +105,18 @@ static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { return true; } -static bool setRetAndArgsNoUndef(Function &F) { - bool Changed = false; +static bool setRetNoUndef(Function &F) { if (!F.getReturnType()->isVoidTy() && !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) { F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); ++NumNoUndef; - Changed = true; + return true; } + return false; +} + +static bool setArgsNoUndef(Function &F) { + bool Changed = false; for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) { if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) { F.addParamAttr(ArgNo, Attribute::NoUndef); @@ -123,6 +127,10 @@ static bool setRetAndArgsNoUndef(Function &F) { return Changed; } +static bool setRetAndArgsNoUndef(Function &F) { + return setRetNoUndef(F) | setArgsNoUndef(F); +} + static bool setRetNonNull(Function &F) { assert(F.getReturnType()->isPointerTy() && "nonnull applies only to pointers"); @@ -318,6 +326,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_malloc: + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; @@ -383,10 +392,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); return Changed; + case LibFunc_reallocf: + Changed |= setRetNoUndef(F); + return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); @@ -427,6 +440,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_aligned_alloc: + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; @@ -448,6 +462,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_calloc: + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; @@ -501,6 +516,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_free: + Changed |= setArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -723,6 +739,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_valloc: + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; @@ -891,6 +908,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_msvc_new_array_int: // new[](unsigned int) case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) // Operator new always returns a nonnull noalias pointer + Changed |= setRetNoUndef(F); Changed |= setRetNonNull(F); Changed |= setRetDoesNotAlias(F); return Changed; diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index d4b0f0fb679d8..c374e90cc075c 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -6,9 +6,9 @@ ; operator new routines declare i8* @_Znwj(i64 ) -; CHECK: declare noalias nonnull i8* @_Znwj(i64) [[G0:#[0-9]+]] +; CHECK: declare noalias noundef nonnull i8* @_Znwj(i64) [[G0:#[0-9]+]] declare i8* @_Znwm(i64) -; CHECK: declare noalias nonnull i8* @_Znwm(i64) [[G0]] +; CHECK: declare noalias noundef nonnull i8* @_Znwm(i64) [[G0]] declare i32 @__nvvm_reflect(i8*) ; CHECK-NVPTX: declare noundef i32 @__nvvm_reflect(i8* noundef) [[G0:#[0-9]+]] @@ -253,7 +253,7 @@ declare void @bcopy(i8*, i8*, i64) ; CHECK: declare void @bzero(i8* nocapture, i64) [[G1]] declare void @bzero(i8*, i64) -; CHECK: declare noalias i8* @calloc(i64, i64) [[G1]] +; CHECK: declare noalias noundef i8* @calloc(i64, i64) [[G1]] declare i8* @calloc(i64, i64) ; CHECK: declare double @cbrt(double) [[G0]] @@ -451,7 +451,7 @@ declare i32 @fputs(i8*, %opaque*) ; CHECK: declare noundef i64 @fread(i8* nocapture noundef, i64 noundef, i64 noundef, %opaque* nocapture noundef) [[G1]] declare i64 @fread(i8*, i64, i64, %opaque*) -; CHECK: declare void @free(i8* nocapture) [[G3:#[0-9]+]] +; CHECK: declare void @free(i8* nocapture noundef) [[G3:#[0-9]+]] declare void @free(i8*) ; CHECK: declare double @frexp(double, i32* nocapture) [[G1]] @@ -613,7 +613,7 @@ declare i32 @lstat(i8*, %opaque*) ; CHECK-LINUX: declare noundef i32 @lstat64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[G1]] declare i32 @lstat64(i8*, %opaque*) -; CHECK: declare noalias i8* @malloc(i64) [[G1]] +; CHECK: declare noalias noundef i8* @malloc(i64) [[G1]] declare i8* @malloc(i64) ; CHECK-LINUX: declare noalias i8* @memalign(i64, i64) [[G0]] @@ -726,10 +726,10 @@ declare i64 @read(i32, i8*, i64) ; CHECK: declare noundef i64 @readlink(i8* nocapture noundef readonly, i8* nocapture noundef, i64 noundef) [[G1]] declare i64 @readlink(i8*, i8*, i64) -; CHECK: declare noalias i8* @realloc(i8* nocapture, i64) [[G3]] +; CHECK: declare noalias noundef i8* @realloc(i8* nocapture, i64) [[G3]] declare i8* @realloc(i8*, i64) -; CHECK: declare i8* @reallocf(i8*, i64) +; CHECK: declare noundef i8* @reallocf(i8*, i64) declare i8* @reallocf(i8*, i64) ; CHECK: declare noundef i8* @realpath(i8* nocapture noundef readonly, i8* noundef) [[G1]] @@ -978,7 +978,7 @@ declare i32 @utime(i8*, %opaque*) ; CHECK: declare noundef i32 @utimes(i8* nocapture noundef readonly, %opaque* nocapture noundef readonly) [[G1]] declare i32 @utimes(i8*, %opaque*) -; CHECK: declare noalias i8* @valloc(i64) [[G1]] +; CHECK: declare noalias noundef i8* @valloc(i64) [[G1]] declare i8* @valloc(i64) ; CHECK: declare noundef i32 @vfprintf(%opaque* nocapture noundef, i8* nocapture noundef readonly, %opaque* noundef) [[G1]] From ae7ab962840a8d6def1af64e89082e55adf50e2c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 18:14:18 +0100 Subject: [PATCH 055/544] LanaiSubtarget.h - remove unnecessary includes. NFCI. TargetFrameLowering.h is guaranteed to be covered by LanaiFrameLowering.h --- llvm/lib/Target/Lanai/LanaiSubtarget.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/Lanai/LanaiSubtarget.h b/llvm/lib/Target/Lanai/LanaiSubtarget.h index f031653419a71..7955bfe0d8b9e 100644 --- a/llvm/lib/Target/Lanai/LanaiSubtarget.h +++ b/llvm/lib/Target/Lanai/LanaiSubtarget.h @@ -17,7 +17,6 @@ #include "LanaiISelLowering.h" #include "LanaiInstrInfo.h" #include "LanaiSelectionDAGInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" From 346199152fc1c70dd439b0adab8a5815e426b4c0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 18:14:43 +0100 Subject: [PATCH 056/544] LanaiTargetMachine.h - remove unnecessary includes. NFCI. --- llvm/lib/Target/Lanai/LanaiTargetMachine.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/llvm/lib/Target/Lanai/LanaiTargetMachine.h index fb2bc0644fe84..00922f44f33a6 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.h +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.h @@ -13,12 +13,10 @@ #ifndef LLVM_LIB_TARGET_LANAI_LANAITARGETMACHINE_H #define LLVM_LIB_TARGET_LANAI_LANAITARGETMACHINE_H -#include "LanaiFrameLowering.h" #include "LanaiISelLowering.h" #include "LanaiInstrInfo.h" #include "LanaiSelectionDAGInfo.h" #include "LanaiSubtarget.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { From 388b068956d4f169e868e7990d1cbc6066c3990c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 29 Sep 2020 13:20:28 -0400 Subject: [PATCH 057/544] [InstCombine] fix weird formatting in test file; NFC It apparently didn't cause trouble for the parser or FileCheck, but it was confusing to see a function def split by asserts. --- .../InstCombine/masked_intrinsics.ll | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 24bf6dd6c5229..2fbccd1bfe2d4 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -69,21 +69,20 @@ define double @load_all(double* %base, double %pt) { ret double %elt } -define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, +define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @load_generic( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) ret <2 x double> %res } -define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, +define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @load_speculative( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer @@ -91,21 +90,19 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; - double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) ret <2 x double> %res } -define <2 x double> @neg_load_spec_width(<2 x double>* dereferenceable(8) %ptr, +define <2 x double> @neg_load_spec_width(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @neg_load_spec_width( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) @@ -113,14 +110,13 @@ define <2 x double> @neg_load_spec_width(<2 x double>* dereferenceable(8) %ptr, } ; Can't speculate since only half of required size is known deref -define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, +define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @load_spec_neg_size( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) @@ -128,7 +124,7 @@ define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, } ; Can only speculate one lane (but it's the only one active) -define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, +define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @load_spec_lan0( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer @@ -136,7 +132,6 @@ define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 @@ -173,12 +168,11 @@ define void @store_demandedelts(<2 x double>* %ptr, double %val) { ret void } -define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, +define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru) { ; CHECK-LABEL: @gather_generic( ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - <2 x double> %passthru) { %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru) ret <2 x double> %res } @@ -217,7 +211,7 @@ define <4 x double> @gather_lane2(double* %base, double %pt) { ret <4 x double> %res } -define <2 x double> @gather_lane0_maybe(double* %base, double %pt, +define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @gather_lane0_maybe( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 @@ -226,7 +220,6 @@ define <2 x double> @gather_lane0_maybe(double* %base, double %pt, ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - <2 x i1> %mask) { %ptrs = getelementptr double, double *%base, <2 x i64> %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0 %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1 @@ -235,7 +228,7 @@ define <2 x double> @gather_lane0_maybe(double* %base, double %pt, ret <2 x double> %res } -define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, +define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @gather_lane0_maybe_spec( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 @@ -244,7 +237,6 @@ define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]]) ; CHECK-NEXT: ret <2 x double> [[RES]] ; - <2 x i1> %mask) { %ptrs = getelementptr double, double *%base, <2 x i64> %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0 %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1 From 259bb61c118bd7d15c3329ffb2daa9ceaea7302f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Sep 2020 10:25:16 -0700 Subject: [PATCH 058/544] [ELF] Fix multiple -mllvm after D70378 Fixes https://reviews.llvm.org/D70378#2299569 Multiple -mllvm is intended to be supported. We don't have a proper test for `-plugin-opt=-`. This patch adds the test as well. Differential Revision: https://reviews.llvm.org/D88461 --- lld/ELF/Driver.cpp | 3 ++- lld/test/ELF/lto/mllvm.ll | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/lto/mllvm.ll diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 8fee30c789ba4..5e80385837cec 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -895,7 +895,6 @@ static void parseClangOption(StringRef opt, const Twine &msg) { raw_string_ostream os(err); const char *argv[] = {config->progName.data(), opt.data()}; - cl::ResetAllOptionOccurrences(); if (cl::ParseCommandLineOptions(2, argv, "", &os)) return; os.flush(); @@ -1105,6 +1104,8 @@ static void readConfigs(opt::InputArgList &args) { error(errPrefix + toString(pat.takeError())); } + cl::ResetAllOptionOccurrences(); + // Parse LTO options. if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq)) parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())), diff --git a/lld/test/ELF/lto/mllvm.ll b/lld/test/ELF/lto/mllvm.ll new file mode 100644 index 0000000000000..883a9c8d8dc75 --- /dev/null +++ b/lld/test/ELF/lto/mllvm.ll @@ -0,0 +1,24 @@ +; REQUIRES: x86 +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t -mllvm -mcpu=znver1 -mllvm -debug-pass=Structure -mllvm -print-after-all 2>&1 | FileCheck %s +; RUN: llvm-objdump -d -j .text %t | FileCheck %s --check-prefix=DISASM + +;; We support -plugin-opt=- for LLVMgold.so compatibility. With a few exceptions, +;; most -plugin-opt=- prefixed options are passed through to cl::ParseCommandLineOptions. +; RUN: ld.lld %t.o -o %t -plugin-opt=-debug-pass=Structure -plugin-opt=-print-after-all 2>&1 | FileCheck %s + +; CHECK: Pass Arguments: +; CHECK: # *** IR Dump + +; DISASM: nopw + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @_start() #0 { +entry: + call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"() + ret void +} + +attributes #0 = { "frame-pointer"="all" } From 0cf48a70651c722a5dabf0ca8ca246b110d7c2ab Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Sep 2020 18:27:28 +0100 Subject: [PATCH 059/544] [InstCombine] visitTrunc - trunc (*shr (trunc A), C) --> trunc(*shr A, C) Attempt to fold trunc (*shr (trunc A), C) --> trunc(*shr A, C) iff the shift amount if small enough that all zero/sign bits created by the shift are removed by the last trunc. Helps fix the regressions encountered in D88316. I've tweaked a couple of shift values as suggested by @lebedev.ri to ensure we have coverage of shift values close (above/below) to the max limit. Differential Revision: https://reviews.llvm.org/D88429 --- .../InstCombine/InstCombineCasts.cpp | 21 ++++++++ .../InstCombine/trunc-shift-trunc.ll | 49 ++++++++----------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 1da6d0c2a92ae..609d3e2ac7ee4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -836,6 +836,27 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // TODO: Mask high bits with 'and'. } + // trunc (*shr (trunc A), C) --> trunc(*shr A, C) + if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) { + unsigned MaxShiftAmt = SrcWidth - DestWidth; + + // If the shift is small enough, all zero/sign bits created by the shift are + // removed by the trunc. + // TODO: Support passing through undef shift amounts - these currently get + // zero'd by getIntegerCast. + if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, + APInt(SrcWidth, MaxShiftAmt)))) { + auto *OldShift = cast(Src); + auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true); + bool IsExact = OldShift->isExact(); + Value *Shift = + OldShift->getOpcode() == Instruction::AShr + ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact) + : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact); + return CastInst::CreateTruncOrBitCast(Shift, DestTy); + } + } + if (Instruction *I = narrowBinOp(Trunc)) return I; diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index 34050250db739..7a4a9c1897270 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -9,9 +9,8 @@ declare void @use(i32) define i8 @trunc_lshr_trunc(i64 %a) { ; CHECK-LABEL: @trunc_lshr_trunc( -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 8 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i8 +; CHECK-NEXT: [[C1:%.*]] = lshr i64 [[A:%.*]], 8 +; CHECK-NEXT: [[D:%.*]] = trunc i64 [[C1]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 @@ -22,9 +21,8 @@ define i8 @trunc_lshr_trunc(i64 %a) { define <2 x i8> @trunc_lshr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_uniform( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> @@ -35,9 +33,8 @@ define <2 x i8> @trunc_lshr_trunc_uniform(<2 x i64> %a) { define <2 x i8> @trunc_lshr_trunc_nonuniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_nonuniform( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> @@ -48,13 +45,12 @@ define <2 x i8> @trunc_lshr_trunc_nonuniform(<2 x i64> %a) { define <2 x i8> @trunc_lshr_trunc_uniform_undef(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_uniform_undef( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = lshr <2 x i32> %b, + %c = lshr <2 x i32> %b, %d = trunc <2 x i32> %c to <2 x i8> ret <2 x i8> %d } @@ -87,9 +83,8 @@ define <2 x i8> @trunc_lshr_trunc_nonuniform_outofrange(<2 x i64> %a) { define i8 @trunc_ashr_trunc(i64 %a) { ; CHECK-LABEL: @trunc_ashr_trunc( -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[B]], 8 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 8 +; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 @@ -100,9 +95,8 @@ define i8 @trunc_ashr_trunc(i64 %a) { define i8 @trunc_ashr_trunc_exact(i64 %a) { ; CHECK-LABEL: @trunc_ashr_trunc_exact( -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[B]], 8 -; CHECK-NEXT: [[D:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[A:%.*]], 8 +; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; %b = trunc i64 %a to i32 @@ -113,9 +107,8 @@ define i8 @trunc_ashr_trunc_exact(i64 %a) { define <2 x i8> @trunc_ashr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_uniform( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> @@ -126,22 +119,20 @@ define <2 x i8> @trunc_ashr_trunc_uniform(<2 x i64> %a) { define <2 x i8> @trunc_ashr_trunc_nonuniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_nonuniform( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = ashr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; CHECK-NEXT: [[C1:%.*]] = ashr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = ashr <2 x i32> %b, + %c = ashr <2 x i32> %b, %d = trunc <2 x i32> %c to <2 x i8> ret <2 x i8> %d } define <2 x i8> @trunc_ashr_trunc_uniform_undef(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_uniform_undef( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = ashr <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> +; CHECK-NEXT: [[C1:%.*]] = ashr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> From 5409e4831fef7d2fa13fadd14ce53a85a99b1682 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 29 Sep 2020 13:29:34 -0400 Subject: [PATCH 060/544] [InstCombine] adjust duplicate test for masked load; NFC The test after the changed test was checking exactly the same dereferenceable bytes. --- .../Transforms/InstCombine/masked_intrinsics.ll | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 2fbccd1bfe2d4..a16f368ddb5cf 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -96,8 +96,8 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 ret <2 x double> %res } -define <2 x double> @neg_load_spec_width(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { -; CHECK-LABEL: @neg_load_spec_width( +define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) { +; CHECK-LABEL: @load_speculative_less_aligned( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) @@ -110,6 +110,7 @@ define <2 x double> @neg_load_spec_width(<2 x double>* dereferenceable(8) %ptr, } ; Can't speculate since only half of required size is known deref + define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { ; CHECK-LABEL: @load_spec_neg_size( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 @@ -158,8 +159,8 @@ define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) { define void @store_demandedelts(<2 x double>* %ptr, double %val) { ; CHECK-LABEL: @store_demandedelts( -; CHECK-NEXT: [[VALVEC2:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC2]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> ) +; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> ) ; CHECK-NEXT: ret void ; %valvec1 = insertelement <2 x double> undef, double %val, i32 0 @@ -257,8 +258,8 @@ define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) { define void @scatter_demandedelts(double* %ptr, double %val) { ; CHECK-LABEL: @scatter_demandedelts( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> -; CHECK-NEXT: [[VALVEC2:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC2]], <2 x double*> [[PTRS]], i32 8, <2 x i1> ) +; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> ) ; CHECK-NEXT: ret void ; %ptrs = getelementptr double, double* %ptr, <2 x i64> From 3681be876fea9b270c7a1d2dc41679a399610e06 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Sep 2020 10:38:51 -0700 Subject: [PATCH 061/544] Add -fprofile-update={atomic,prefer-atomic,single} GCC 7 introduced -fprofile-update={atomic,prefer-atomic} (prefer-atomic is for best efforts (some targets do not support atomics)) to increment counters atomically, which is exactly what we have done with -fprofile-instr-generate (D50867) and -fprofile-arcs (b5ef137c11b1cc6ae839ee75b49233825772bdd0). This patch adds the option to clang to surface the internal options at driver level. GCC 7 also turned on -fprofile-update=prefer-atomic when -pthread is specified, but it has performance regression (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89307). So we don't follow suit. Differential Revision: https://reviews.llvm.org/D87737 --- clang/docs/UsersManual.rst | 11 +++++++++++ clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 3 +++ clang/lib/CodeGen/BackendUtil.cpp | 7 ++----- clang/lib/Driver/ToolChains/Clang.cpp | 11 +++++++++++ clang/lib/Frontend/CompilerInvocation.cpp | 1 + clang/test/CodeGen/code-coverage-tsan.c | 9 +++++---- clang/test/CodeGen/tsan-instrprof-atomic.c | 2 +- clang/test/Driver/fprofile-update.c | 15 +++++++++++++++ 9 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 clang/test/Driver/fprofile-update.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 2d0d71443dfda..ed6c9e3bc341a 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2172,6 +2172,17 @@ programs using the same instrumentation method as ``-fprofile-generate``. profile file, it reads from that file. If ``pathname`` is a directory name, it reads from ``pathname/default.profdata``. +.. option:: -fprofile-update[=] + + Unless ``-fsanitize=thread`` is specified, the default is ``single``, which + uses non-atomic increments. The counters can be inaccurate under thread + contention. ``atomic`` uses atomic increments which is accurate but has + overhead. ``prefer-atomic`` will be transformed to ``atomic`` when supported + by the target, or ``single`` otherwise. + + This option currently works with ``-fprofile-arcs`` and ``-fprofile-instr-generate``, + but not with ``-fprofile-generate``. + Disabling Instrumentation ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index a259218b29c6e..062a8c3fe64aa 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -185,6 +185,7 @@ CODEGENOPT(ObjCConvertMessagesToRuntimeCalls , 1, 1) VALUE_CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified. VALUE_CODEGENOPT(OptimizeSize, 2, 0) ///< If -Os (==1) or -Oz (==2) is specified. +CODEGENOPT(AtomicProfileUpdate , 1, 0) ///< Set -fprofile-update=atomic /// Choose profile instrumenation kind or no instrumentation. ENUM_CODEGENOPT(ProfileInstr, ProfileInstrKind, 2, ProfileNone) /// Choose profile kind for PGO use compilation. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a1f3d7a4316f6..09fdf50b1cb80 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -853,6 +853,9 @@ def fprofile_filter_files_EQ : Joined<["-"], "fprofile-filter-files=">, def fprofile_exclude_files_EQ : Joined<["-"], "fprofile-exclude-files=">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Instrument only functions from files where names don't match all the regexes separated by a semi-colon">; +def fprofile_update_EQ : Joined<["-"], "fprofile-update=">, + Group, Flags<[CC1Option, CoreOption]>, Values<"atomic,prefer-atomic,single">, + MetaVarName<"">, HelpText<"Set update method of profile counters (atomic,prefer-atomic,single)">; def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f83ec2479652e..d77590cc2adf3 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -570,7 +570,7 @@ static Optional getGCOVOptions(const CodeGenOptions &CodeGenOpts, Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.Filter = CodeGenOpts.ProfileFilterFiles; Options.Exclude = CodeGenOpts.ProfileExcludeFiles; - Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + Options.Atomic = CodeGenOpts.AtomicProfileUpdate; return Options; } @@ -582,10 +582,7 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts, InstrProfOptions Options; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; - - // TODO: Surface the option to emit atomic profile counter increments at - // the driver level. - Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + Options.Atomic = CodeGenOpts.AtomicProfileUpdate; return Options; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 12b3c8615e913..272a498990122 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -868,6 +868,17 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-filter-files=" + v))); } + if (const auto *A = Args.getLastArg(options::OPT_fprofile_update_EQ)) { + StringRef Val = A->getValue(); + if (Val == "atomic" || Val == "prefer-atomic") + CmdArgs.push_back("-fprofile-update=atomic"); + else if (Val != "single") + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Val; + } else if (TC.getSanitizerArgs().needsTsanRt()) { + CmdArgs.push_back("-fprofile-update=atomic"); + } + // Leave -fprofile-dir= an unused argument unless .gcda emission is // enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider // the flag used. There is no -fno-profile-dir, so the user has no diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 42224339250d6..b402f53cc765b 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -884,6 +884,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.DebugRangesBaseAddress = Args.hasArg(OPT_fdebug_ranges_base_address); setPGOInstrumentor(Opts, Args, Diags); + Opts.AtomicProfileUpdate = Args.hasArg(OPT_fprofile_update_EQ); Opts.InstrProfileOutput = std::string(Args.getLastArgValue(OPT_fprofile_instrument_path_EQ)); Opts.ProfileInstrumentUsePath = diff --git a/clang/test/CodeGen/code-coverage-tsan.c b/clang/test/CodeGen/code-coverage-tsan.c index 17f6596aa83df..47eabaa375e51 100644 --- a/clang/test/CodeGen/code-coverage-tsan.c +++ b/clang/test/CodeGen/code-coverage-tsan.c @@ -1,11 +1,12 @@ -/// -fsanitize=thread requires the (potentially concurrent) counter updates to be atomic. -// RUN: %clang_cc1 %s -triple x86_64 -emit-llvm -fsanitize=thread -femit-coverage-notes -femit-coverage-data \ +/// -fprofile-update=atomic (implied by -fsanitize=thread) requires the +/// (potentially concurrent) counter updates to be atomic. +// RUN: %clang_cc1 %s -triple x86_64 -emit-llvm -fprofile-update=atomic -femit-coverage-notes -femit-coverage-data \ // RUN: -coverage-notes-file /dev/null -coverage-data-file /dev/null -o - | FileCheck %s // CHECK-LABEL: void @foo() /// Two counters are incremented by __tsan_atomic64_fetch_add. -// CHECK: call i64 @__tsan_atomic64_fetch_add -// CHECK-NEXT: call i32 @__tsan_atomic32_fetch_sub +// CHECK: atomicrmw add i64* {{.*}} @__llvm_gcov_ctr +// CHECK-NEXT: atomicrmw sub i32* _Atomic(int) cnt; void foo() { cnt--; } diff --git a/clang/test/CodeGen/tsan-instrprof-atomic.c b/clang/test/CodeGen/tsan-instrprof-atomic.c index 9519cb7eb8ed6..48d39424e73c7 100644 --- a/clang/test/CodeGen/tsan-instrprof-atomic.c +++ b/clang/test/CodeGen/tsan-instrprof-atomic.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -emit-llvm -fprofile-instrument=clang -fsanitize=thread -o - | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -fprofile-instrument=clang -fprofile-update=atomic -o - | FileCheck %s // CHECK: define {{.*}}@foo // CHECK-NOT: load {{.*}}foo diff --git a/clang/test/Driver/fprofile-update.c b/clang/test/Driver/fprofile-update.c new file mode 100644 index 0000000000000..befbcea03b876 --- /dev/null +++ b/clang/test/Driver/fprofile-update.c @@ -0,0 +1,15 @@ +/// For -fprofile-instr-generate and -fprofile-arcs, increment counters atomically +/// if -fprofile-update={atomic,prefer-atomic} or -fsanitize=thread is specified. +// RUN: %clang -### %s -c -target x86_64-linux -fsanitize=thread %s 2>&1 | FileCheck %s +// RUN: %clang -### %s -c -fprofile-update=atomic 2>&1 | FileCheck %s +// RUN: %clang -### %s -c -fprofile-update=prefer-atomic 2>&1 | FileCheck %s + +// CHECK: "-fprofile-update=atomic" + +// RUN: %clang -### %s -c -fprofile-update=atomic -fprofile-update=single 2>&1 | FileCheck %s --check-prefix=SINGLE + +// SINGLE-NOT: "-fprofile-update=atomic" + +// RUN: not %clang %s -c -fprofile-update=unknown 2>&1 | FileCheck %s --check-prefix=ERROR + +// ERROR: error: unsupported argument 'unknown' to option 'fprofile-update=' From 6d193ba3337e40be297c88ff8088d6cade3d5838 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 28 Sep 2020 16:41:28 -0700 Subject: [PATCH 062/544] [NFC][regalloc] Unit test for AllocationOrder iteration. Added unittests. In the process, separated core construction - which just needs the hits, order, and 'HardHints' values - from construction from current register allocation state, to simplify testing. Differential Revision: https://reviews.llvm.org/D88455 --- llvm/lib/CodeGen/AllocationOrder.cpp | 17 ++- llvm/lib/CodeGen/AllocationOrder.h | 22 ++-- llvm/lib/CodeGen/RegAllocBasic.cpp | 3 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 6 +- .../unittests/CodeGen/AllocationOrderTest.cpp | 114 ++++++++++++++++++ llvm/unittests/CodeGen/CMakeLists.txt | 1 + 6 files changed, 143 insertions(+), 20 deletions(-) create mode 100644 llvm/unittests/CodeGen/AllocationOrderTest.cpp diff --git a/llvm/lib/CodeGen/AllocationOrder.cpp b/llvm/lib/CodeGen/AllocationOrder.cpp index c99800659bfd8..2aef1234ac0ed 100644 --- a/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/llvm/lib/CodeGen/AllocationOrder.cpp @@ -26,17 +26,15 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Compare VirtRegMap::getRegAllocPref(). -AllocationOrder::AllocationOrder(unsigned VirtReg, - const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo, - const LiveRegMatrix *Matrix) - : Pos(0), HardHints(false) { +AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix) { const MachineFunction &MF = VRM.getMachineFunction(); const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); - Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); - if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix)) - HardHints = true; - rewind(); + auto Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); + SmallVector Hints; + bool HardHints = + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix); LLVM_DEBUG({ if (!Hints.empty()) { @@ -51,4 +49,5 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, assert(is_contained(Order, Hints[I]) && "Target hint is outside allocation order."); #endif + return AllocationOrder(std::move(Hints), Order, HardHints); } diff --git a/llvm/lib/CodeGen/AllocationOrder.h b/llvm/lib/CodeGen/AllocationOrder.h index 75f87dd7d6544..368a3cd81d4c5 100644 --- a/llvm/lib/CodeGen/AllocationOrder.h +++ b/llvm/lib/CodeGen/AllocationOrder.h @@ -28,12 +28,12 @@ class VirtRegMap; class LiveRegMatrix; class LLVM_LIBRARY_VISIBILITY AllocationOrder { - SmallVector Hints; + const SmallVector Hints; ArrayRef Order; - int Pos; + int Pos = 0; // If HardHints is true, *only* Hints will be returned. - bool HardHints; + const bool HardHints; public: @@ -41,10 +41,16 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder { /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. /// @param RegClassInfo Information about reserved and allocatable registers. - AllocationOrder(unsigned VirtReg, - const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo, - const LiveRegMatrix *Matrix); + static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix); + + /// Create an AllocationOrder given the Hits, Order, and HardHits values. + /// Use the create method above - the ctor is for unittests. + AllocationOrder(SmallVector &&Hints, ArrayRef Order, + bool HardHints) + : Hints(std::move(Hints)), Order(Order), + Pos(-static_cast(this->Hints.size())), HardHints(HardHints) {} /// Get the allocation order without reordered hints. ArrayRef getOrder() const { return Order; } @@ -52,7 +58,7 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder { /// Return the next physical register in the allocation order, or 0. /// It is safe to call next() again after it returned 0, it will keep /// returning 0 until rewind() is called. - unsigned next(unsigned Limit = 0) { + MCPhysReg next(unsigned Limit = 0) { if (Pos < 0) return Hints.end()[Pos++]; if (HardHints) diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index a4ce9d70a270a..0fa50d97fb22a 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -259,7 +259,8 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVector PhysRegSpillCands; // Check for an available register in this class. - AllocationOrder Order(VirtReg.reg(), *VRM, RegClassInfo, Matrix); + auto Order = + AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); while (Register PhysReg = Order.next()) { // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index f2bd458da5879..eb0a096b9b4be 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -800,7 +800,8 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg, //===----------------------------------------------------------------------===// Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) { - AllocationOrder Order(VirtReg.reg(), *VRM, RegClassInfo, Matrix); + auto Order = + AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); Register PhysReg; while ((PhysReg = Order.next())) { if (PhysReg == PrevReg) @@ -3013,7 +3014,8 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. - AllocationOrder Order(VirtReg.reg(), *VRM, RegClassInfo, Matrix); + auto Order = + AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { // If VirtReg got an assignment, the eviction info is no longre relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg()); diff --git a/llvm/unittests/CodeGen/AllocationOrderTest.cpp b/llvm/unittests/CodeGen/AllocationOrderTest.cpp new file mode 100644 index 0000000000000..ba1a1e4f4c00c --- /dev/null +++ b/llvm/unittests/CodeGen/AllocationOrderTest.cpp @@ -0,0 +1,114 @@ +//===- llvm/unittest/CodeGen/AllocationOrderTest.cpp - AllocationOrder tests =// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../lib/CodeGen/AllocationOrder.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +std::vector loadOrder(AllocationOrder &O, unsigned Limit = 0) { + std::vector Ret; + O.rewind(); + while (auto R = O.next(Limit)) + Ret.push_back(R); + return Ret; +} +} // namespace + +TEST(AllocationOrderTest, Basic) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 5, 6, 7}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5, 6, 7}), loadOrder(O)); +} + +TEST(AllocationOrderTest, Duplicates) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 1, 5, 6}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5, 6}), loadOrder(O)); +} + +TEST(AllocationOrderTest, HardHints) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 5, 6, 7}; + AllocationOrder O(std::move(Hints), Order, true); + EXPECT_EQ((std::vector{1, 2, 3}), loadOrder(O)); +} + +TEST(AllocationOrderTest, LimitsBasic) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 5, 6, 7}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5, 6, 7}), loadOrder(O, 0)); + EXPECT_EQ((std::vector{1, 2, 3, 4}), loadOrder(O, 1)); +} + +TEST(AllocationOrderTest, LimitsDuplicates) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 1, 5, 6}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3, 4}), loadOrder(O, 1)); + EXPECT_EQ((std::vector{1, 2, 3, 4}), loadOrder(O, 2)); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5}), loadOrder(O, 3)); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5, 6}), loadOrder(O, 4)); +} + +TEST(AllocationOrderTest, LimitsHardHints) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 1, 5, 6}; + AllocationOrder O(std::move(Hints), Order, true); + EXPECT_EQ((std::vector{1, 2, 3}), loadOrder(O, 1)); +} + +TEST(AllocationOrderTest, DuplicateIsFirst) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {1, 4, 5, 6}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5, 6}), loadOrder(O)); +} + +TEST(AllocationOrderTest, DuplicateIsFirstWithLimits) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {1, 4, 5, 6}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3}), loadOrder(O, 1)); + EXPECT_EQ((std::vector{1, 2, 3, 4}), loadOrder(O, 2)); + EXPECT_EQ((std::vector{1, 2, 3, 4, 5}), loadOrder(O, 3)); +} + +TEST(AllocationOrderTest, NoHints) { + SmallVector Hints; + SmallVector Order = {1, 2, 3, 4}; + AllocationOrder O(std::move(Hints), Order, false); + EXPECT_EQ((std::vector{1, 2, 3, 4}), loadOrder(O)); + EXPECT_EQ((std::vector{1, 2}), loadOrder(O, 2)); + EXPECT_EQ((std::vector{1, 2, 3}), loadOrder(O, 3)); +} + +TEST(AllocationOrderTest, IsHintTest) { + SmallVector Hints = {1, 2, 3}; + SmallVector Order = {4, 1, 5, 6}; + AllocationOrder O(std::move(Hints), Order, false); + O.rewind(); + auto V = O.next(); + EXPECT_TRUE(O.isHint()); + EXPECT_EQ(V, 1U); + O.next(); + EXPECT_TRUE(O.isHint()); + O.next(); + EXPECT_TRUE(O.isHint()); + V = O.next(); + EXPECT_FALSE(O.isHint()); + EXPECT_EQ(V, 4U); + V = O.next(); + EXPECT_TRUE(O.isHint(1)); + EXPECT_FALSE(O.isHint()); + EXPECT_EQ(V, 5U); +} diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 817ddb1bbf26c..0e02144d1787f 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(CodeGenTests AArch64SelectionDAGTest.cpp + AllocationOrderTest.cpp AsmPrinterDwarfTest.cpp DIEHashTest.cpp DIETest.cpp From 543922cd3630ca3a1e06a6a946d148bc0e22e720 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Mon, 28 Sep 2020 09:08:09 -0700 Subject: [PATCH 063/544] Adds MLIR C-API for marshaling Python capsules. * Providing stable, C-accessible definitions for bridging MLIR Python<->C APIs, we eliminate inter-extension dependencies (i.e. they can all share a diamond dependency on the MLIR C-API). * Just provides accessors for context and module right now. * Needed in NPComp in ~a week or so for high level Torch APIs. Differential Revision: https://reviews.llvm.org/D88426 --- mlir/include/mlir-c/Bindings/Python/Interop.h | 93 +++++++++++++++++++ mlir/include/mlir-c/IR.h | 3 + mlir/lib/Bindings/Python/IRModules.cpp | 20 ++++ mlir/lib/Bindings/Python/IRModules.h | 14 +++ .../test/Bindings/Python/context_lifecycle.py | 7 ++ mlir/test/Bindings/Python/ir_module.py | 10 ++ 6 files changed, 147 insertions(+) create mode 100644 mlir/include/mlir-c/Bindings/Python/Interop.h diff --git a/mlir/include/mlir-c/Bindings/Python/Interop.h b/mlir/include/mlir-c/Bindings/Python/Interop.h new file mode 100644 index 0000000000000..24b2a8b9de397 --- /dev/null +++ b/mlir/include/mlir-c/Bindings/Python/Interop.h @@ -0,0 +1,93 @@ +/*===-- mlir-c/Interop.h - Constants for Python/C-API interop -----*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares constants and helpers necessary for C-level *| +|* interop with the MLIR Python extension module. Since the Python bindings *| +|* are a thin wrapper around the MLIR C-API, a further C-API is not provided *| +|* specifically for the Python extension. Instead, simple facilities are *| +|* provided for translating between Python types and corresponding MLIR C-API *| +|* types. *| +|* *| +|* This header is standalone, requiring nothing beyond normal linking against *| +|* the Python implementation. *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MLIR_C_BINDINGS_PYTHON_INTEROP_H +#define MLIR_C_BINDINGS_PYTHON_INTEROP_H + +#include + +#include "mlir-c/IR.h" + +#define MLIR_PYTHON_CAPSULE_CONTEXT "mlir.ir.Context._CAPIPtr" +#define MLIR_PYTHON_CAPSULE_MODULE "mlir.ir.Module._CAPIPtr" + +/** Attribute on MLIR Python objects that expose their C-API pointer. + * This will be a type-specific capsule created as per one of the helpers + * below. + * + * Ownership is not transferred by acquiring a capsule in this way: the + * validity of the pointer wrapped by the capsule will be bounded by the + * lifetime of the Python object that produced it. Only the name and pointer + * of the capsule are set. The caller is free to set a destructor and context + * as needed to manage anything further. */ +#define MLIR_PYTHON_CAPI_PTR_ATTR "_CAPIPtr" + +/** Attribute on MLIR Python objects that exposes a factory function for + * constructing the corresponding Python object from a type-specific + * capsule wrapping the C-API pointer. The signature of the function is: + * def _CAPICreate(capsule) -> object + * Calling such a function implies a transfer of ownership of the object the + * capsule wraps: after such a call, the capsule should be considered invalid, + * and its wrapped pointer must not be destroyed. + * + * Only a very small number of Python objects can be created in such a fashion + * (i.e. top-level types such as Context where the lifetime can be cleanly + * delineated). */ +#define MLIR_PYTHON_CAPI_FACTORY_ATTR "_CAPICreate" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Creates a capsule object encapsulating the raw C-API MlirContext. + * The returned capsule does not extend or affect ownership of any Python + * objects that reference the context in any way. + */ +inline PyObject *mlirPythonContextToCapsule(MlirContext context) { + return PyCapsule_New(context.ptr, MLIR_PYTHON_CAPSULE_CONTEXT, NULL); +} + +/** Extracts a MlirContext from a capsule as produced from + * mlirPythonContextToCapsule. If the capsule is not of the right type, then + * a null context is returned (as checked via mlirContextIsNull). In such a + * case, the Python APIs will have already set an error. */ +inline MlirContext mlirPythonCapsuleToContext(PyObject *capsule) { + void *ptr = PyCapsule_GetPointer(capsule, MLIR_PYTHON_CAPSULE_CONTEXT); + MlirContext context = {ptr}; + return context; +} + +/** Creates a capsule object encapsulating the raw C-API MlirModule. + * The returned capsule does not extend or affect ownership of any Python + * objects that reference the module in any way. */ +inline PyObject *mlirPythonModuleToCapsule(MlirModule module) { +#ifdef __cplusplus + void *ptr = const_cast(module.ptr); +#else + void *ptr = (void *)ptr; +#endif + return PyCapsule_New(ptr, MLIR_PYTHON_CAPSULE_MODULE, NULL); +} + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_BINDINGS_PYTHON_INTEROP_H diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 82149c7fce06b..c751da804097d 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -91,6 +91,9 @@ MlirContext mlirContextCreate(); /** Checks if two contexts are equal. */ int mlirContextEqual(MlirContext ctx1, MlirContext ctx2); +/** Checks whether a context is null. */ +inline int mlirContextIsNull(MlirContext context) { return !context.ptr; } + /** Takes an MLIR context owned by the caller and destroys it. */ void mlirContextDestroy(MlirContext context); diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index f3bd96856d090..8d64b2d8de0a5 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -9,6 +9,7 @@ #include "IRModules.h" #include "PybindUtils.h" +#include "mlir-c/Bindings/Python/Interop.h" #include "mlir-c/Registration.h" #include "mlir-c/StandardAttributes.h" #include "mlir-c/StandardTypes.h" @@ -453,6 +454,17 @@ PyMlirContext::~PyMlirContext() { mlirContextDestroy(context); } +py::object PyMlirContext::getCapsule() { + return py::reinterpret_steal(mlirPythonContextToCapsule(get())); +} + +py::object PyMlirContext::createFromCapsule(py::object capsule) { + MlirContext rawContext = mlirPythonCapsuleToContext(capsule.ptr()); + if (mlirContextIsNull(rawContext)) + throw py::error_already_set(); + return forContext(rawContext).releaseObject(); +} + PyMlirContext *PyMlirContext::createNewContextForInit() { MlirContext context = mlirContextCreate(); mlirRegisterAllDialects(context); @@ -581,6 +593,10 @@ PyModuleRef PyModule::create(PyMlirContextRef contextRef, MlirModule module) { return PyModuleRef(unownedModule, std::move(pyRef)); } +py::object PyModule::getCapsule() { + return py::reinterpret_steal(mlirPythonModuleToCapsule(get())); +} + //------------------------------------------------------------------------------ // PyOperation //------------------------------------------------------------------------------ @@ -1345,6 +1361,9 @@ void mlir::python::populateIRSubmodule(py::module &m) { return ref.releaseObject(); }) .def("_get_live_operation_count", &PyMlirContext::getLiveOperationCount) + .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, + &PyMlirContext::getCapsule) + .def(MLIR_PYTHON_CAPI_FACTORY_ATTR, &PyMlirContext::createFromCapsule) .def_property( "allow_unregistered_dialects", [](PyMlirContext &self) -> bool { @@ -1428,6 +1447,7 @@ void mlir::python::populateIRSubmodule(py::module &m) { // Mapping of Module py::class_(m, "Module") + .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, &PyModule::getCapsule) .def_property_readonly( "operation", [](PyModule &self) { diff --git a/mlir/lib/Bindings/Python/IRModules.h b/mlir/lib/Bindings/Python/IRModules.h index 41b18d2160265..e67142e56c00c 100644 --- a/mlir/lib/Bindings/Python/IRModules.h +++ b/mlir/lib/Bindings/Python/IRModules.h @@ -108,6 +108,14 @@ class PyMlirContext { return PyMlirContextRef(this, pybind11::cast(this)); } + /// Gets a capsule wrapping the void* within the MlirContext. + pybind11::object getCapsule(); + + /// Creates a PyMlirContext from the MlirContext wrapped by a capsule. + /// Note that PyMlirContext instances are uniqued, so the returned object + /// may be a pre-existing object. + static pybind11::object createFromCapsule(pybind11::object capsule); + /// Gets the count of live context objects. Used for testing. static size_t getLiveCount(); @@ -195,6 +203,12 @@ class PyModule : public BaseContextObject { pybind11::reinterpret_borrow(handle)); } + /// Gets a capsule wrapping the void* within the MlirModule. + /// Note that the module does not (yet) provide a corresponding factory for + /// constructing from a capsule as that would require uniquing PyModule + /// instances, which is not currently done. + pybind11::object getCapsule(); + private: PyModule(PyMlirContextRef contextRef, MlirModule module) : BaseContextObject(std::move(contextRef)), module(module) {} diff --git a/mlir/test/Bindings/Python/context_lifecycle.py b/mlir/test/Bindings/Python/context_lifecycle.py index e2b287061b223..460f41ccd4a72 100644 --- a/mlir/test/Bindings/Python/context_lifecycle.py +++ b/mlir/test/Bindings/Python/context_lifecycle.py @@ -40,3 +40,10 @@ c2 = None gc.collect() assert mlir.ir.Context._get_live_count() == 0 + +# Create a context, get its capsule and create from capsule. +c4 = mlir.ir.Context() +c4_capsule = c4._CAPIPtr +assert '"mlir.ir.Context._CAPIPtr"' in repr(c4_capsule) +c5 = mlir.ir.Context._CAPICreate(c4_capsule) +assert c4 is c5 diff --git a/mlir/test/Bindings/Python/ir_module.py b/mlir/test/Bindings/Python/ir_module.py index 614e1af8b8e76..d85a415308aed 100644 --- a/mlir/test/Bindings/Python/ir_module.py +++ b/mlir/test/Bindings/Python/ir_module.py @@ -84,3 +84,13 @@ def testModuleOperation(): assert ctx._get_live_operation_count() == 0 run(testModuleOperation) + + +# CHECK-LABEL: TEST: testModuleCapsule +def testModuleCapsule(): + ctx = mlir.ir.Context() + module = ctx.parse_module(r"""module @successfulParse {}""") + # CHECK: "mlir.ir.Module._CAPIPtr" + print(module._CAPIPtr) + +run(testModuleCapsule) From 4cda881e0d8b67d411b6a8daf55de53cf5d42ded Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Sep 2020 17:50:16 +0000 Subject: [PATCH 064/544] [gn build] Port 6d193ba3337 --- llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index fe5ee15605c0b..1b39b583e138d 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -17,6 +17,7 @@ unittest("CodeGenTests") { ] sources = [ "AArch64SelectionDAGTest.cpp", + "AllocationOrderTest.cpp", "AsmPrinterDwarfTest.cpp", "DIEHashTest.cpp", "DIETest.cpp", From 05a3b4fe30050b2b75c5eb339af52ba68cf01a7c Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 28 Sep 2020 22:47:00 -0700 Subject: [PATCH 065/544] [MLIR] Add Async dialect with trivial async.region operation Start Async dialect for modeling asynchronous execution. Reviewed By: mehdi_amini, herhut Differential Revision: https://reviews.llvm.org/D88459 --- .../include/mlir/Dialect/Async/CMakeLists.txt | 1 + mlir/include/mlir/Dialect/Async/IR/Async.h | 39 ++++++++++ .../mlir/Dialect/Async/IR/AsyncBase.td | 42 ++++++++++ .../include/mlir/Dialect/Async/IR/AsyncOps.td | 76 +++++++++++++++++++ .../mlir/Dialect/Async/IR/CMakeLists.txt | 2 + mlir/include/mlir/Dialect/CMakeLists.txt | 1 + mlir/include/mlir/InitAllDialects.h | 2 + mlir/lib/Dialect/Async/CMakeLists.txt | 1 + mlir/lib/Dialect/Async/IR/Async.cpp | 54 +++++++++++++ mlir/lib/Dialect/Async/IR/CMakeLists.txt | 13 ++++ mlir/lib/Dialect/CMakeLists.txt | 1 + mlir/test/Dialect/Async/ops.mlir | 16 ++++ 12 files changed, 248 insertions(+) create mode 100644 mlir/include/mlir/Dialect/Async/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/Async/IR/Async.h create mode 100644 mlir/include/mlir/Dialect/Async/IR/AsyncBase.td create mode 100644 mlir/include/mlir/Dialect/Async/IR/AsyncOps.td create mode 100644 mlir/include/mlir/Dialect/Async/IR/CMakeLists.txt create mode 100644 mlir/lib/Dialect/Async/CMakeLists.txt create mode 100644 mlir/lib/Dialect/Async/IR/Async.cpp create mode 100644 mlir/lib/Dialect/Async/IR/CMakeLists.txt create mode 100644 mlir/test/Dialect/Async/ops.mlir diff --git a/mlir/include/mlir/Dialect/Async/CMakeLists.txt b/mlir/include/mlir/Dialect/Async/CMakeLists.txt new file mode 100644 index 0000000000000..f33061b2d87cf --- /dev/null +++ b/mlir/include/mlir/Dialect/Async/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(IR) diff --git a/mlir/include/mlir/Dialect/Async/IR/Async.h b/mlir/include/mlir/Dialect/Async/IR/Async.h new file mode 100644 index 0000000000000..f61d07b7d0dfd --- /dev/null +++ b/mlir/include/mlir/Dialect/Async/IR/Async.h @@ -0,0 +1,39 @@ +//===- Async.h - MLIR Async dialect -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the async dialect that is used for modeling asynchronous +// execution. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_ASYNC_IR_ASYNC_H +#define MLIR_DIALECT_ASYNC_IR_ASYNC_H + +#include "mlir/IR/Dialect.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/OpImplementation.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" + +namespace mlir { +namespace async { + +/// The token type to represent asynchronous operation completion. +class TokenType : public Type::TypeBase { +public: + using Base::Base; +}; + +} // namespace async +} // namespace mlir + +#define GET_OP_CLASSES +#include "mlir/Dialect/Async/IR/AsyncOps.h.inc" + +#include "mlir/Dialect/Async/IR/AsyncOpsDialect.h.inc" + +#endif // MLIR_DIALECT_ASYNC_IR_ASYNC_H diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td b/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td new file mode 100644 index 0000000000000..ac67e9f1609d7 --- /dev/null +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td @@ -0,0 +1,42 @@ +//===- AsyncBase.td ----------------------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Base definitions for the `async` dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef ASYNC_BASE_TD +#define ASYNC_BASE_TD + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// Async dialect definitions +//===----------------------------------------------------------------------===// + +def AsyncDialect : Dialect { + let name = "async"; + + let summary = "Types and operations for async dialect"; + let description = [{ + This dialect contains operations for modeling asynchronous execution. + }]; + + let cppNamespace = "::mlir::async"; +} + +def Async_TokenType : DialectType()">, "token type">, + BuildableType<"$_builder.getType<::mlir::async::TokenType>()"> { + let typeDescription = [{ + `async.token` is a type returned by asynchronous operations, and it becomes + `ready` when the asynchronous operations that created it is completed. + }]; +} + +#endif // ASYNC_BASE_TD diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td new file mode 100644 index 0000000000000..b84f7c4028016 --- /dev/null +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td @@ -0,0 +1,76 @@ +//===- AsyncOps.td - Async operations definition -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the operation definition file for Async dialect operations. +// +//===----------------------------------------------------------------------===// + +#ifndef ASYNC_OPS +#define ASYNC_OPS + +include "mlir/Dialect/Async/IR/AsyncBase.td" +include "mlir/Interfaces/SideEffectInterfaces.td" + +//===----------------------------------------------------------------------===// +// Async op definitions +//===----------------------------------------------------------------------===// + +// Base class for the operation in this dialect +class Async_Op traits = []> : + Op; + +def Async_ExecuteOp : Async_Op<"execute"> { + let summary = "Asynchronous execute operation"; + let description = [{ + The `body` region attached to the `async.execute` operation semantically + can be executed concurrently with the successor operation. In the followup + example "compute0" can be executed concurrently with "compute1". + + The actual concurrency semantics depends on the dialect lowering to the + executable format. Fully sequential execution ("compute0" completes before + "compute1" starts) is a completely legal execution. + + Because concurrent execution is not guaranteed, it is illegal to create an + implicit dependency from "compute1" to "compute0" (e.g. via shared global + state). All dependencies must be made explicit with async execute arguments + (`async.token` or `async.value`). + + Example: + + ```mlir + %0 = async.execute { + "compute0"(...) + async.yield + } : !async.token + + %1 = "compute1"(...) + ``` + }]; + + // TODO: Take async.tokens/async.values as arguments. + let arguments = (ins ); + let results = (outs Async_TokenType:$done); + let regions = (region SizedRegion<1>:$body); + + let assemblyFormat = "$body attr-dict `:` type($done)"; +} + +def Async_YieldOp : + Async_Op<"yield", [HasParent<"ExecuteOp">, NoSideEffect, Terminator]> { + let summary = "terminator for Async execute operation"; + let description = [{ + The `async.yield` is a special terminator operation for the block inside + `async.execute` operation. + }]; + + let arguments = (ins Variadic:$operands); + + let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; +} + +#endif // ASYNC_OPS diff --git a/mlir/include/mlir/Dialect/Async/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Async/IR/CMakeLists.txt new file mode 100644 index 0000000000000..0aff86209a39b --- /dev/null +++ b/mlir/include/mlir/Dialect/Async/IR/CMakeLists.txt @@ -0,0 +1,2 @@ +add_mlir_dialect(AsyncOps async) +add_mlir_doc(AsyncOps -gen-dialect-doc AsyncDialect Dialects/) diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt index 6426fa8a91e74..103225948238f 100644 --- a/mlir/include/mlir/Dialect/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Affine) +add_subdirectory(Async) add_subdirectory(AVX512) add_subdirectory(GPU) add_subdirectory(Linalg) diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 190486a6c0402..060acdec5a13d 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -16,6 +16,7 @@ #include "mlir/Dialect/AVX512/AVX512Dialect.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Async/IR/Async.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMAVX512Dialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -42,6 +43,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { // clang-format off registry.insert(); + addTypes(); +} + +/// Parse a type registered to this dialect. +Type AsyncDialect::parseType(DialectAsmParser &parser) const { + StringRef keyword; + if (parser.parseKeyword(&keyword)) + return Type(); + + if (keyword == "token") + return TokenType::get(getContext()); + + parser.emitError(parser.getNameLoc(), "unknown async type: ") << keyword; + return Type(); +} + +/// Print a type registered to this dialect. +void AsyncDialect::printType(Type type, DialectAsmPrinter &os) const { + TypeSwitch(type) + .Case([&](Type) { os << "token"; }) + .Default([](Type) { llvm_unreachable("unexpected 'async' type kind"); }); +} + +#define GET_OP_CLASSES +#include "mlir/Dialect/Async/IR/AsyncOps.cpp.inc" diff --git a/mlir/lib/Dialect/Async/IR/CMakeLists.txt b/mlir/lib/Dialect/Async/IR/CMakeLists.txt new file mode 100644 index 0000000000000..87946f715a0aa --- /dev/null +++ b/mlir/lib/Dialect/Async/IR/CMakeLists.txt @@ -0,0 +1,13 @@ +add_mlir_dialect_library(MLIRAsync + Async.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Async + + DEPENDS + MLIRAsyncOpsIncGen + + LINK_LIBS PUBLIC + MLIRDialect + MLIRIR + ) diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt index 790264fc47877..24ffb192338a5 100644 --- a/mlir/lib/Dialect/CMakeLists.txt +++ b/mlir/lib/Dialect/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Affine) +add_subdirectory(Async) add_subdirectory(AVX512) add_subdirectory(GPU) add_subdirectory(Linalg) diff --git a/mlir/test/Dialect/Async/ops.mlir b/mlir/test/Dialect/Async/ops.mlir new file mode 100644 index 0000000000000..2f5d0123e2157 --- /dev/null +++ b/mlir/test/Dialect/Async/ops.mlir @@ -0,0 +1,16 @@ +// RUN: mlir-opt %s | FileCheck %s + +// CHECK-LABEL: @identity +func @identity(%arg0 : !async.token) -> !async.token { + // CHECK: return %arg0 : !async.token + return %arg0 : !async.token +} + +// CHECK-LABEL: @empty_async_execute +func @empty_async_execute() -> !async.token { + %0 = async.execute { + async.yield + } : !async.token + + return %0 : !async.token +} From 962a247aebba39bc8f2d6aa901ed512f5c09dc72 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 29 Sep 2020 20:06:47 +0200 Subject: [PATCH 066/544] [clangd] Fix assertion in remote-index marshalling convert_to_slash is a no-op on posix style. --- .../clangd/index/remote/marshalling/Marshalling.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp index 839250982a03b..31ce4a44ea55a 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp @@ -296,8 +296,7 @@ llvm::Expected Marshaller::toProtobuf(const clangd::SymbolID &Subject, llvm::Expected Marshaller::relativePathToURI(llvm::StringRef RelativePath) { assert(LocalIndexRoot); - assert(RelativePath == llvm::sys::path::convert_to_slash( - RelativePath, llvm::sys::path::Style::posix)); + assert(RelativePath == llvm::sys::path::convert_to_slash(RelativePath)); if (RelativePath.empty()) return error("Empty relative path."); if (llvm::sys::path::is_absolute(RelativePath)) From 1b1d9815987a753f2f3524cfad050b85972dae5b Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Mon, 28 Sep 2020 10:28:29 -0700 Subject: [PATCH 067/544] Revert "Revert "Add the ability to write target stop-hooks using the ScriptInterpreter."" This reverts commit f775fe59640a2e837ad059a8f40e26989d4f9831. I fixed a return type error in the original patch that was causing a test failure. Also added a REQUIRES: python to the shell test so we'll skip this for people who build lldb w/o Python. Also added another test for the error printing. --- lldb/bindings/python/python-swigsafecast.swig | 7 + lldb/bindings/python/python-wrapper.swig | 121 +++++++ lldb/docs/use/python-reference.rst | 46 +++ .../lldb/Interpreter/ScriptInterpreter.h | 17 + lldb/include/lldb/Symbol/SymbolContext.h | 2 +- lldb/include/lldb/Target/Target.h | 86 ++++- lldb/source/Commands/CommandObjectTarget.cpp | 113 +++++- lldb/source/Commands/Options.td | 12 +- .../Python/ScriptInterpreterPython.cpp | 64 ++++ .../Python/ScriptInterpreterPythonImpl.h | 8 + lldb/source/Symbol/SymbolContext.cpp | 8 +- lldb/source/Target/Target.cpp | 338 +++++++++++++----- .../target/stop-hooks/TestStopHookScripted.py | 146 ++++++++ .../target/stop-hooks/TestStopHooks.py | 12 +- .../API/commands/target/stop-hooks/main.c | 3 +- .../commands/target/stop-hooks/stop_hook.py | 49 +++ lldb/test/Shell/Commands/Inputs/stop_hook.py | 10 + .../Commands/command-stop-hook-output.test | 19 + .../Python/PythonTestSuite.cpp | 14 + 19 files changed, 943 insertions(+), 132 deletions(-) create mode 100644 lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py create mode 100644 lldb/test/API/commands/target/stop-hooks/stop_hook.py create mode 100644 lldb/test/Shell/Commands/Inputs/stop_hook.py create mode 100644 lldb/test/Shell/Commands/command-stop-hook-output.test diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index d5cafbfa67cb2..091fc29b1057d 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -152,3 +152,10 @@ SBTypeToSWIGWrapper (lldb::SBSymbolContext* sym_ctx_sb) { return SWIG_NewPointerObj((void *) sym_ctx_sb, SWIGTYPE_p_lldb__SBSymbolContext, 0); } + +template <> +PyObject* +SBTypeToSWIGWrapper (lldb::SBStream* stream_sb) +{ + return SWIG_NewPointerObj((void *) stream_sb, SWIGTYPE_p_lldb__SBStream, 0); +} diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 516590ed57713..c00deba6073b4 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -468,6 +468,127 @@ LLDBSwigPythonCallBreakpointResolver return ret_val; } +SWIGEXPORT void * +LLDBSwigPythonCreateScriptedStopHook +( + lldb::TargetSP target_sp, + const char *python_class_name, + const char *session_dictionary_name, + lldb_private::StructuredDataImpl *args_impl, + Status &error +) +{ + if (python_class_name == NULL || python_class_name[0] == '\0') { + error.SetErrorString("Empty class name."); + Py_RETURN_NONE; + } + if (!session_dictionary_name) { + error.SetErrorString("No session dictionary"); + Py_RETURN_NONE; + } + + PyErr_Cleaner py_err_cleaner(true); + + auto dict = + PythonModule::MainModule().ResolveName( + session_dictionary_name); + auto pfunc = + PythonObject::ResolveNameWithDictionary( + python_class_name, dict); + + if (!pfunc.IsAllocated()) { + error.SetErrorStringWithFormat("Could not find class: %s.", + python_class_name); + return nullptr; + } + + lldb::SBTarget *target_val + = new lldb::SBTarget(target_sp); + + PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(target_val)); + + lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl); + PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(args_value)); + + PythonObject result = pfunc(target_arg, args_arg, dict); + + if (result.IsAllocated()) + { + // Check that the handle_stop callback is defined: + auto callback_func = result.ResolveName("handle_stop"); + if (callback_func.IsAllocated()) { + if (auto args_info = callback_func.GetArgInfo()) { + size_t num_args = (*args_info).max_positional_args; + if (num_args != 2) { + error.SetErrorStringWithFormat("Wrong number of args for " + "handle_stop callback, should be 2 (excluding self), got: %d", + num_args); + Py_RETURN_NONE; + } else + return result.release(); + } else { + error.SetErrorString("Couldn't get num arguments for handle_stop " + "callback."); + Py_RETURN_NONE; + } + return result.release(); + } + else { + error.SetErrorStringWithFormat("Class \"%s\" is missing the required " + "handle_stop callback.", + python_class_name); + result.release(); + } + } + Py_RETURN_NONE; +} + +SWIGEXPORT bool +LLDBSwigPythonStopHookCallHandleStop +( + void *implementor, + lldb::ExecutionContextRefSP exc_ctx_sp, + lldb::StreamSP stream +) +{ + // handle_stop will return a bool with the meaning "should_stop"... + // If you return nothing we'll assume we are going to stop. + // Also any errors should return true, since we should stop on error. + + PyErr_Cleaner py_err_cleaner(false); + PythonObject self(PyRefType::Borrowed, static_cast(implementor)); + auto pfunc = self.ResolveName("handle_stop"); + + if (!pfunc.IsAllocated()) + return true; + + PythonObject result; + lldb::SBExecutionContext sb_exc_ctx(exc_ctx_sp); + PythonObject exc_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_exc_ctx)); + lldb::SBStream sb_stream; + PythonObject sb_stream_arg(PyRefType::Owned, + SBTypeToSWIGWrapper(sb_stream)); + result = pfunc(exc_ctx_arg, sb_stream_arg); + + if (PyErr_Occurred()) + { + stream->PutCString("Python error occurred handling stop-hook."); + PyErr_Print(); + PyErr_Clear(); + return true; + } + + // Now add the result to the output stream. SBStream only + // makes an internally help StreamString which I can't interpose, so I + // have to copy it over here. + stream->PutCString(sb_stream.GetData()); + + if (result.get() == Py_False) + return false; + else + return true; +} + // wrapper that calls an optional instance member of an object taking no arguments static PyObject* LLDBSwigPython_CallOptionalMember diff --git a/lldb/docs/use/python-reference.rst b/lldb/docs/use/python-reference.rst index 8c76ef1a08307..60474c94f1850 100644 --- a/lldb/docs/use/python-reference.rst +++ b/lldb/docs/use/python-reference.rst @@ -819,3 +819,49 @@ When the program is stopped at the beginning of the 'read' function in libc, we frame #0: 0x00007fff06013ca0 libsystem_kernel.dylib`read (lldb) frame variable (int) fd = 3 + + Writing Target Stop-Hooks in Python: + ------------------------------------ + + Stop hooks fire whenever the process stops just before control is returned to the + user. Stop hooks can either be a set of lldb command-line commands, or can + be implemented by a suitably defined Python class. The Python based stop-hooks + can also be passed as set of -key -value pairs when they are added, and those + will get packaged up into a SBStructuredData Dictionary and passed to the + constructor of the Python object managing the stop hook. This allows for + parametrization of the stop hooks. + + To add a Python-based stop hook, first define a class with the following methods: + ++--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Name | Arguments | Description | ++--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| **__init__** | **target: lldb.SBTarget** | This is the constructor for the new stop-hook. | +| | **extra_args: lldb.SBStructuredData** | | +| | | | +| | | **target** is the SBTarget to which the stop hook is added. | +| | | | +| | | **extra_args** is an SBStructuredData object that the user can pass in when creating instances of this | +| | | breakpoint. It is not required, but allows for reuse of stop-hook classes. | ++--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| **handle_stop** | **exe_ctx: lldb.SBExecutionContext** | This is the called when the target stops. | +| | **stream: lldb.SBStream** | | +| | | **exe_ctx** argument will be filled with the current stop point for which the stop hook is | +| | | being evaluated. | +| | | | +| | | **stream** an lldb.SBStream, anything written to this stream will be written to the debugger console. | +| | | | +| | | The return value is a "Should Stop" vote from this thread. If the method returns either True or no return | +| | | this thread votes to stop. If it returns False, then the thread votes to continue after all the stop-hooks | +| | | are evaluated. | +| | | Note, the --auto-continue flag to 'target stop-hook add' overrides a True return value from the method. | ++--------------------+---------------------------------------+------------------------------------------------------------------------------------------------------------------+ + +To use this class in lldb, run the command: + +:: + + (lldb) command script import MyModule.py + (lldb) target stop-hook add -P MyModule.MyStopHook -k first -v 1 -k second -v 2 + +where MyModule.py is the file containing the class definition MyStopHook. diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 491923e6a6c4f..c38786fd50d42 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -298,6 +298,23 @@ class ScriptInterpreter : public PluginInterface { return lldb::eSearchDepthModule; } + virtual StructuredData::GenericSP + CreateScriptedStopHook(lldb::TargetSP target_sp, const char *class_name, + StructuredDataImpl *args_data, Status &error) { + error.SetErrorString("Creating scripted stop-hooks with the current " + "script interpreter is not supported."); + return StructuredData::GenericSP(); + } + + // This dispatches to the handle_stop method of the stop-hook class. It + // returns a "should_stop" bool. + virtual bool + ScriptedStopHookHandleStop(StructuredData::GenericSP implementor_sp, + ExecutionContext &exc_ctx, + lldb::StreamSP stream_sp) { + return true; + } + virtual StructuredData::ObjectSP LoadPluginModule(const FileSpec &file_spec, lldb_private::Status &error) { return StructuredData::ObjectSP(); diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h index cc49ce51c7139..0f99364596c27 100644 --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -340,7 +340,7 @@ class SymbolContextSpecifier { void Clear(); - bool SymbolContextMatches(SymbolContext &sc); + bool SymbolContextMatches(const SymbolContext &sc); bool AddressMatches(lldb::addr_t addr); diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 92904682ffb63..94c6ebeac10da 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -28,6 +28,7 @@ #include "lldb/Target/ExecutionContextScope.h" #include "lldb/Target/PathMappingList.h" #include "lldb/Target/SectionLoadHistory.h" +#include "lldb/Target/ThreadSpec.h" #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/Broadcaster.h" #include "lldb/Utility/LLDBAssert.h" @@ -508,6 +509,8 @@ class Target : public std::enable_shared_from_this, static void SetDefaultArchitecture(const ArchSpec &arch); + bool IsDummyTarget() const { return m_is_dummy_target; } + /// Find a binary on the system and return its Module, /// or return an existing Module that is already in the Target. /// @@ -1139,23 +1142,27 @@ class Target : public std::enable_shared_from_this, class StopHook : public UserID { public: StopHook(const StopHook &rhs); + virtual ~StopHook() = default; - ~StopHook(); - - StringList *GetCommandPointer() { return &m_commands; } - - const StringList &GetCommands() { return m_commands; } + enum class StopHookKind : uint32_t { CommandBased = 0, ScriptBased }; lldb::TargetSP &GetTarget() { return m_target_sp; } - void SetCommands(StringList &in_commands) { m_commands = in_commands; } - // Set the specifier. The stop hook will own the specifier, and is // responsible for deleting it when we're done. void SetSpecifier(SymbolContextSpecifier *specifier); SymbolContextSpecifier *GetSpecifier() { return m_specifier_sp.get(); } + bool ExecutionContextPasses(const ExecutionContext &exe_ctx); + + // Called on stop, this gets passed the ExecutionContext for each "stop + // with a reason" thread. It should add to the stream whatever text it + // wants to show the user, and return False to indicate it wants the target + // not to stop. + virtual bool HandleStop(ExecutionContext &exe_ctx, + lldb::StreamSP output) = 0; + // Set the Thread Specifier. The stop hook will own the thread specifier, // and is responsible for deleting it when we're done. void SetThreadSpecifier(ThreadSpec *specifier); @@ -1173,26 +1180,79 @@ class Target : public std::enable_shared_from_this, bool GetAutoContinue() const { return m_auto_continue; } void GetDescription(Stream *s, lldb::DescriptionLevel level) const; + virtual void GetSubclassDescription(Stream *s, + lldb::DescriptionLevel level) const = 0; - private: + protected: lldb::TargetSP m_target_sp; - StringList m_commands; lldb::SymbolContextSpecifierSP m_specifier_sp; std::unique_ptr m_thread_spec_up; bool m_active = true; bool m_auto_continue = false; + StopHook(lldb::TargetSP target_sp, lldb::user_id_t uid); + }; + + class StopHookCommandLine : public StopHook { + public: + virtual ~StopHookCommandLine() = default; + + StringList &GetCommands() { return m_commands; } + void SetActionFromString(const std::string &strings); + void SetActionFromStrings(const std::vector &strings); + + bool HandleStop(ExecutionContext &exc_ctx, + lldb::StreamSP output_sp) override; + void GetSubclassDescription(Stream *s, + lldb::DescriptionLevel level) const override; + + private: + StringList m_commands; // Use CreateStopHook to make a new empty stop hook. The GetCommandPointer // and fill it with commands, and SetSpecifier to set the specifier shared // pointer (can be null, that will match anything.) - StopHook(lldb::TargetSP target_sp, lldb::user_id_t uid); + StopHookCommandLine(lldb::TargetSP target_sp, lldb::user_id_t uid) + : StopHook(target_sp, uid) {} + friend class Target; + }; + + class StopHookScripted : public StopHook { + public: + virtual ~StopHookScripted() = default; + bool HandleStop(ExecutionContext &exc_ctx, lldb::StreamSP output) override; + + Status SetScriptCallback(std::string class_name, + StructuredData::ObjectSP extra_args_sp); + + void GetSubclassDescription(Stream *s, + lldb::DescriptionLevel level) const override; + + private: + std::string m_class_name; + /// This holds the dictionary of keys & values that can be used to + /// parametrize any given callback's behavior. + StructuredDataImpl *m_extra_args; // We own this structured data, + // but the SD itself manages the UP. + /// This holds the python callback object. + StructuredData::GenericSP m_implementation_sp; + + /// Use CreateStopHook to make a new empty stop hook. The GetCommandPointer + /// and fill it with commands, and SetSpecifier to set the specifier shared + /// pointer (can be null, that will match anything.) + StopHookScripted(lldb::TargetSP target_sp, lldb::user_id_t uid) + : StopHook(target_sp, uid) {} friend class Target; }; + typedef std::shared_ptr StopHookSP; - // Add an empty stop hook to the Target's stop hook list, and returns a - // shared pointer to it in new_hook. Returns the id of the new hook. - StopHookSP CreateStopHook(); + /// Add an empty stop hook to the Target's stop hook list, and returns a + /// shared pointer to it in new_hook. Returns the id of the new hook. + StopHookSP CreateStopHook(StopHook::StopHookKind kind); + + /// If you tried to create a stop hook, and that failed, call this to + /// remove the stop hook, as it will also reset the stop hook counter. + void UndoCreateStopHook(lldb::user_id_t uid); void RunStopHooks(); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 431c2f3a19f00..98285289e3a98 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -24,6 +24,7 @@ #include "lldb/Interpreter/OptionGroupFile.h" #include "lldb/Interpreter/OptionGroupFormat.h" #include "lldb/Interpreter/OptionGroupPlatform.h" +#include "lldb/Interpreter/OptionGroupPythonClassWithDict.h" #include "lldb/Interpreter/OptionGroupString.h" #include "lldb/Interpreter/OptionGroupUInt64.h" #include "lldb/Interpreter/OptionGroupUUID.h" @@ -4442,10 +4443,10 @@ class CommandObjectTargetSymbols : public CommandObjectMultiword { class CommandObjectTargetStopHookAdd : public CommandObjectParsed, public IOHandlerDelegateMultiline { public: - class CommandOptions : public Options { + class CommandOptions : public OptionGroup { public: CommandOptions() - : Options(), m_line_start(0), m_line_end(UINT_MAX), + : OptionGroup(), m_line_start(0), m_line_end(UINT_MAX), m_func_name_type_mask(eFunctionNameTypeAuto), m_sym_ctx_specified(false), m_thread_specified(false), m_use_one_liner(false), m_one_liner() {} @@ -4459,7 +4460,8 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, ExecutionContext *execution_context) override { Status error; - const int short_option = m_getopt_table[option_idx].val; + const int short_option = + g_target_stop_hook_add_options[option_idx].short_option; switch (short_option) { case 'c': @@ -4589,20 +4591,75 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, // Instance variables to hold the values for one_liner options. bool m_use_one_liner; std::vector m_one_liner; + bool m_auto_continue; }; CommandObjectTargetStopHookAdd(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "target stop-hook add", - "Add a hook to be executed when the target stops.", + "Add a hook to be executed when the target stops." + "The hook can either be a list of commands or an " + "appropriately defined Python class. You can also " + "add filters so the hook only runs a certain stop " + "points.", "target stop-hook add"), IOHandlerDelegateMultiline("DONE", IOHandlerDelegate::Completion::LLDBCommand), - m_options() {} + m_options(), m_python_class_options("scripted stop-hook", true, 'P') { + SetHelpLong( + R"( +Command Based stop-hooks: +------------------------- + Stop hooks can run a list of lldb commands by providing one or more + --one-line-command options. The commands will get run in the order they are + added. Or you can provide no commands, in which case you will enter a + command editor where you can enter the commands to be run. + +Python Based Stop Hooks: +------------------------ + Stop hooks can be implemented with a suitably defined Python class, whose name + is passed in the --python-class option. + + When the stop hook is added, the class is initialized by calling: + + def __init__(self, target, extra_args, dict): + + target: The target that the stop hook is being added to. + extra_args: An SBStructuredData Dictionary filled with the -key -value + option pairs passed to the command. + dict: An implementation detail provided by lldb. + + Then when the stop-hook triggers, lldb will run the 'handle_stop' method. + The method has the signature: + + def handle_stop(self, exe_ctx, stream): + + exe_ctx: An SBExecutionContext for the thread that has stopped. + stream: An SBStream, anything written to this stream will be printed in the + the stop message when the process stops. + + Return Value: The method returns "should_stop". If should_stop is false + from all the stop hook executions on threads that stopped + with a reason, then the process will continue. Note that this + will happen only after all the stop hooks are run. + +Filter Options: +--------------- + Stop hooks can be set to always run, or to only run when the stopped thread + matches the filter options passed on the command line. The available filter + options include a shared library or a thread or queue specification, + a line range in a source file, a function name or a class name. + )"); + m_all_options.Append(&m_python_class_options, + LLDB_OPT_SET_1 | LLDB_OPT_SET_2, + LLDB_OPT_SET_FROM_TO(4, 6)); + m_all_options.Append(&m_options); + m_all_options.Finalize(); + } ~CommandObjectTargetStopHookAdd() override = default; - Options *GetOptions() override { return &m_options; } + Options *GetOptions() override { return &m_all_options; } protected: void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { @@ -4626,10 +4683,15 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, error_sp->Flush(); } Target *target = GetDebugger().GetSelectedTarget().get(); - if (target) - target->RemoveStopHookByID(m_stop_hook_sp->GetID()); + if (target) { + target->UndoCreateStopHook(m_stop_hook_sp->GetID()); + } } else { - m_stop_hook_sp->GetCommandPointer()->SplitIntoLines(line); + // The IOHandler editor is only for command lines stop hooks: + Target::StopHookCommandLine *hook_ptr = + static_cast(m_stop_hook_sp.get()); + + hook_ptr->SetActionFromString(line); StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); if (output_sp) { output_sp->Printf("Stop hook #%" PRIu64 " added.\n", @@ -4646,7 +4708,10 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, m_stop_hook_sp.reset(); Target &target = GetSelectedOrDummyTarget(); - Target::StopHookSP new_hook_sp = target.CreateStopHook(); + Target::StopHookSP new_hook_sp = + target.CreateStopHook(m_python_class_options.GetName().empty() ? + Target::StopHook::StopHookKind::CommandBased + : Target::StopHook::StopHookKind::ScriptBased); // First step, make the specifier. std::unique_ptr specifier_up; @@ -4715,11 +4780,30 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, new_hook_sp->SetAutoContinue(m_options.m_auto_continue); if (m_options.m_use_one_liner) { - // Use one-liners. - for (auto cmd : m_options.m_one_liner) - new_hook_sp->GetCommandPointer()->AppendString(cmd.c_str()); + // This is a command line stop hook: + Target::StopHookCommandLine *hook_ptr = + static_cast(new_hook_sp.get()); + hook_ptr->SetActionFromStrings(m_options.m_one_liner); result.AppendMessageWithFormat("Stop hook #%" PRIu64 " added.\n", new_hook_sp->GetID()); + } else if (!m_python_class_options.GetName().empty()) { + // This is a scripted stop hook: + Target::StopHookScripted *hook_ptr = + static_cast(new_hook_sp.get()); + Status error = hook_ptr->SetScriptCallback( + m_python_class_options.GetName(), + m_python_class_options.GetStructuredData()); + if (error.Success()) + result.AppendMessageWithFormat("Stop hook #%" PRIu64 " added.\n", + new_hook_sp->GetID()); + else { + // FIXME: Set the stop hook ID counter back. + result.AppendErrorWithFormat("Couldn't add stop hook: %s", + error.AsCString()); + result.SetStatus(eReturnStatusFailed); + target.UndoCreateStopHook(new_hook_sp->GetID()); + return false; + } } else { m_stop_hook_sp = new_hook_sp; m_interpreter.GetLLDBCommandsFromIOHandler("> ", // Prompt @@ -4732,6 +4816,9 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, private: CommandOptions m_options; + OptionGroupPythonClassWithDict m_python_class_options; + OptionGroupOptions m_all_options; + Target::StopHookSP m_stop_hook_sp; }; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 8c83fd20a366d..ad2f5fdae8e73 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -879,7 +879,7 @@ let Command = "target modules lookup" in { } let Command = "target stop hook add" in { - def target_stop_hook_add_one_liner : Option<"one-liner", "o">, + def target_stop_hook_add_one_liner : Option<"one-liner", "o">, GroupRange<1,3>, Arg<"OneLiner">, Desc<"Add a command for the stop hook. Can be specified " "more than once, and commands will be run in the order they appear.">; def target_stop_hook_add_shlib : Option<"shlib", "s">, Arg<"ShlibName">, @@ -897,19 +897,19 @@ let Command = "target stop hook add" in { def target_stop_hook_add_queue_name : Option<"queue-name", "q">, Arg<"QueueName">, Desc<"The stop hook is run only for threads in the queue " "whose name is given by this argument.">; - def target_stop_hook_add_file : Option<"file", "f">, Group<1>, + def target_stop_hook_add_file : Option<"file", "f">, Groups<[1,4]>, Arg<"Filename">, Desc<"Specify the source file within which the stop-hook " "is to be run.">, Completion<"SourceFile">; - def target_stop_hook_add_start_line : Option<"start-line", "l">, Group<1>, + def target_stop_hook_add_start_line : Option<"start-line", "l">, Groups<[1,4]>, Arg<"LineNum">, Desc<"Set the start of the line range for which the " "stop-hook is to be run.">; - def target_stop_hook_add_end_line : Option<"end-line", "e">, Group<1>, + def target_stop_hook_add_end_line : Option<"end-line", "e">, Groups<[1,4]>, Arg<"LineNum">, Desc<"Set the end of the line range for which the stop-hook" " is to be run.">; - def target_stop_hook_add_classname : Option<"classname", "c">, Group<2>, + def target_stop_hook_add_classname : Option<"classname", "c">, Groups<[2,5]>, Arg<"ClassName">, Desc<"Specify the class within which the stop-hook is to be run.">; - def target_stop_hook_add_name : Option<"name", "n">, Group<3>, + def target_stop_hook_add_name : Option<"name", "n">, Groups<[3,6]>, Arg<"FunctionName">, Desc<"Set the function name within which the stop hook" " will be run.">, Completion<"Symbol">; def target_stop_hook_add_auto_continue : Option<"auto-continue", "G">, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 9f56b4fa60a50..f67572c1f0299 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -127,6 +127,16 @@ extern "C" unsigned int LLDBSwigPythonCallBreakpointResolver(void *implementor, const char *method_name, lldb_private::SymbolContext *sym_ctx); +extern "C" void *LLDBSwigPythonCreateScriptedStopHook( + TargetSP target_sp, const char *python_class_name, + const char *session_dictionary_name, lldb_private::StructuredDataImpl *args, + lldb_private::Status &error); + +extern "C" bool +LLDBSwigPythonStopHookCallHandleStop(void *implementor, + lldb::ExecutionContextRefSP exc_ctx, + lldb::StreamSP stream); + extern "C" size_t LLDBSwigPython_CalculateNumChildren(void *implementor, uint32_t max); @@ -1979,6 +1989,60 @@ ScriptInterpreterPythonImpl::ScriptedBreakpointResolverSearchDepth( return lldb::eSearchDepthModule; } +StructuredData::GenericSP ScriptInterpreterPythonImpl::CreateScriptedStopHook( + TargetSP target_sp, const char *class_name, StructuredDataImpl *args_data, + Status &error) { + + if (!target_sp) { + error.SetErrorString("No target for scripted stop-hook."); + return StructuredData::GenericSP(); + } + + if (class_name == nullptr || class_name[0] == '\0') { + error.SetErrorString("No class name for scripted stop-hook."); + return StructuredData::GenericSP(); + } + + ScriptInterpreter *script_interpreter = m_debugger.GetScriptInterpreter(); + ScriptInterpreterPythonImpl *python_interpreter = + static_cast(script_interpreter); + + if (!script_interpreter) { + error.SetErrorString("No script interpreter for scripted stop-hook."); + return StructuredData::GenericSP(); + } + + void *ret_val; + + { + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + + ret_val = LLDBSwigPythonCreateScriptedStopHook( + target_sp, class_name, python_interpreter->m_dictionary_name.c_str(), + args_data, error); + } + + return StructuredData::GenericSP(new StructuredPythonObject(ret_val)); +} + +bool ScriptInterpreterPythonImpl::ScriptedStopHookHandleStop( + StructuredData::GenericSP implementor_sp, ExecutionContext &exc_ctx, + lldb::StreamSP stream_sp) { + assert(implementor_sp && + "can't call a stop hook with an invalid implementor"); + assert(stream_sp && "can't call a stop hook with an invalid stream"); + + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + + lldb::ExecutionContextRefSP exc_ctx_ref_sp(new ExecutionContextRef(exc_ctx)); + + bool ret_val = LLDBSwigPythonStopHookCallHandleStop( + implementor_sp->GetValue(), exc_ctx_ref_sp, stream_sp); + return ret_val; +} + StructuredData::ObjectSP ScriptInterpreterPythonImpl::LoadPluginModule(const FileSpec &file_spec, lldb_private::Status &error) { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index 22b2c8152eac0..f89c3d461f7fd 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -105,6 +105,14 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { lldb::SearchDepth ScriptedBreakpointResolverSearchDepth( StructuredData::GenericSP implementor_sp) override; + StructuredData::GenericSP + CreateScriptedStopHook(lldb::TargetSP target_sp, const char *class_name, + StructuredDataImpl *args_data, Status &error) override; + + bool ScriptedStopHookHandleStop(StructuredData::GenericSP implementor_sp, + ExecutionContext &exc_ctx, + lldb::StreamSP stream_sp) override; + StructuredData::GenericSP CreateFrameRecognizer(const char *class_name) override; diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 51f56704cca66..f20dc61996e0b 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -1010,11 +1010,15 @@ void SymbolContextSpecifier::Clear() { m_type = eNothingSpecified; } -bool SymbolContextSpecifier::SymbolContextMatches(SymbolContext &sc) { +bool SymbolContextSpecifier::SymbolContextMatches(const SymbolContext &sc) { if (m_type == eNothingSpecified) return true; - if (m_target_sp.get() != sc.target_sp.get()) + // Only compare targets if this specifier has one and it's not the Dummy + // target. Otherwise if a specifier gets made in the dummy target and + // copied over we'll artificially fail the comparision. + if (m_target_sp && !m_target_sp->IsDummyTarget() && + m_target_sp != sc.target_sp) return false; if (m_type & eModuleSpecified) { diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index a529df998ba7a..a5250ddcef741 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -2484,13 +2484,28 @@ ClangModulesDeclVendor *Target::GetClangModulesDeclVendor() { return m_clang_modules_decl_vendor_up.get(); } -Target::StopHookSP Target::CreateStopHook() { +Target::StopHookSP Target::CreateStopHook(StopHook::StopHookKind kind) { lldb::user_id_t new_uid = ++m_stop_hook_next_id; - Target::StopHookSP stop_hook_sp(new StopHook(shared_from_this(), new_uid)); + Target::StopHookSP stop_hook_sp; + switch (kind) { + case StopHook::StopHookKind::CommandBased: + stop_hook_sp.reset(new StopHookCommandLine(shared_from_this(), new_uid)); + break; + case StopHook::StopHookKind::ScriptBased: + stop_hook_sp.reset(new StopHookScripted(shared_from_this(), new_uid)); + break; + } m_stop_hooks[new_uid] = stop_hook_sp; return stop_hook_sp; } +void Target::UndoCreateStopHook(lldb::user_id_t user_id) { + if (!RemoveStopHookByID(user_id)) + return; + if (user_id == m_stop_hook_next_id) + m_stop_hook_next_id--; +} + bool Target::RemoveStopHookByID(lldb::user_id_t user_id) { size_t num_removed = m_stop_hooks.erase(user_id); return (num_removed != 0); @@ -2546,25 +2561,18 @@ void Target::RunStopHooks() { if (m_stop_hooks.empty()) return; - StopHookCollection::iterator pos, end = m_stop_hooks.end(); - // If there aren't any active stop hooks, don't bother either. - // Also see if any of the active hooks want to auto-continue. bool any_active_hooks = false; - bool auto_continue = false; for (auto hook : m_stop_hooks) { if (hook.second->IsActive()) { any_active_hooks = true; - auto_continue |= hook.second->GetAutoContinue(); + break; } } if (!any_active_hooks) return; - CommandReturnObject result(m_debugger.GetUseColor()); - std::vector exc_ctx_with_reasons; - std::vector sym_ctx_with_reasons; ThreadList &cur_threadlist = m_process_sp->GetThreadList(); size_t num_threads = cur_threadlist.GetSize(); @@ -2572,10 +2580,8 @@ void Target::RunStopHooks() { lldb::ThreadSP cur_thread_sp = cur_threadlist.GetThreadAtIndex(i); if (cur_thread_sp->ThreadStoppedForAReason()) { lldb::StackFrameSP cur_frame_sp = cur_thread_sp->GetStackFrameAtIndex(0); - exc_ctx_with_reasons.push_back(ExecutionContext( - m_process_sp.get(), cur_thread_sp.get(), cur_frame_sp.get())); - sym_ctx_with_reasons.push_back( - cur_frame_sp->GetSymbolContext(eSymbolContextEverything)); + exc_ctx_with_reasons.emplace_back(m_process_sp.get(), cur_thread_sp.get(), + cur_frame_sp.get()); } } @@ -2584,91 +2590,86 @@ void Target::RunStopHooks() { if (num_exe_ctx == 0) return; - result.SetImmediateOutputStream(m_debugger.GetAsyncOutputStream()); - result.SetImmediateErrorStream(m_debugger.GetAsyncErrorStream()); + StreamSP output_sp = m_debugger.GetAsyncOutputStream(); - bool keep_going = true; + bool auto_continue = false; bool hooks_ran = false; bool print_hook_header = (m_stop_hooks.size() != 1); bool print_thread_header = (num_exe_ctx != 1); - bool did_restart = false; + bool should_stop = false; + bool somebody_restarted = false; - for (pos = m_stop_hooks.begin(); keep_going && pos != end; pos++) { - // result.Clear(); - StopHookSP cur_hook_sp = (*pos).second; + for (auto stop_entry : m_stop_hooks) { + StopHookSP cur_hook_sp = stop_entry.second; if (!cur_hook_sp->IsActive()) continue; bool any_thread_matched = false; - for (size_t i = 0; keep_going && i < num_exe_ctx; i++) { - if ((cur_hook_sp->GetSpecifier() == nullptr || - cur_hook_sp->GetSpecifier()->SymbolContextMatches( - sym_ctx_with_reasons[i])) && - (cur_hook_sp->GetThreadSpecifier() == nullptr || - cur_hook_sp->GetThreadSpecifier()->ThreadPassesBasicTests( - exc_ctx_with_reasons[i].GetThreadRef()))) { - if (!hooks_ran) { - hooks_ran = true; - } - if (print_hook_header && !any_thread_matched) { - const char *cmd = - (cur_hook_sp->GetCommands().GetSize() == 1 - ? cur_hook_sp->GetCommands().GetStringAtIndex(0) - : nullptr); - if (cmd) - result.AppendMessageWithFormat("\n- Hook %" PRIu64 " (%s)\n", - cur_hook_sp->GetID(), cmd); - else - result.AppendMessageWithFormat("\n- Hook %" PRIu64 "\n", - cur_hook_sp->GetID()); - any_thread_matched = true; - } + for (auto exc_ctx : exc_ctx_with_reasons) { + // We detect somebody restarted in the stop-hook loop, and broke out of + // that loop back to here. So break out of here too. + if (somebody_restarted) + break; - if (print_thread_header) - result.AppendMessageWithFormat( - "-- Thread %d\n", - exc_ctx_with_reasons[i].GetThreadPtr()->GetIndexID()); - - CommandInterpreterRunOptions options; - options.SetStopOnContinue(true); - options.SetStopOnError(true); - options.SetEchoCommands(false); - options.SetPrintResults(true); - options.SetPrintErrors(true); - options.SetAddToHistory(false); - - // Force Async: - bool old_async = GetDebugger().GetAsyncExecution(); - GetDebugger().SetAsyncExecution(true); - GetDebugger().GetCommandInterpreter().HandleCommands( - cur_hook_sp->GetCommands(), &exc_ctx_with_reasons[i], options, - result); - GetDebugger().SetAsyncExecution(old_async); - // If the command started the target going again, we should bag out of - // running the stop hooks. - if ((result.GetStatus() == eReturnStatusSuccessContinuingNoResult) || - (result.GetStatus() == eReturnStatusSuccessContinuingResult)) { - // But only complain if there were more stop hooks to do: - StopHookCollection::iterator tmp = pos; - if (++tmp != end) - result.AppendMessageWithFormat( - "\nAborting stop hooks, hook %" PRIu64 - " set the program running.\n" - " Consider using '-G true' to make " - "stop hooks auto-continue.\n", - cur_hook_sp->GetID()); - keep_going = false; - did_restart = true; - } + if (!cur_hook_sp->ExecutionContextPasses(exc_ctx)) + continue; + + // We only consult the auto-continue for a stop hook if it matched the + // specifier. + auto_continue |= cur_hook_sp->GetAutoContinue(); + + if (!hooks_ran) + hooks_ran = true; + + if (print_hook_header && !any_thread_matched) { + StreamString s; + cur_hook_sp->GetDescription(&s, eDescriptionLevelBrief); + if (s.GetSize() != 0) + output_sp->Printf("\n- Hook %" PRIu64 " (%s)\n", cur_hook_sp->GetID(), + s.GetData()); + else + output_sp->Printf("\n- Hook %" PRIu64 "\n", cur_hook_sp->GetID()); + any_thread_matched = true; + } + + if (print_thread_header) + output_sp->Printf("-- Thread %d\n", + exc_ctx.GetThreadPtr()->GetIndexID()); + + bool this_should_stop = cur_hook_sp->HandleStop(exc_ctx, output_sp); + // If this hook is set to auto-continue that should override the + // HandleStop result... + if (cur_hook_sp->GetAutoContinue()) + this_should_stop = false; + + // If anybody wanted to stop, we should all stop. + if (!should_stop) + should_stop = this_should_stop; + + // We don't have a good way to prohibit people from restarting the target + // willy nilly in a stop hook. So see if the private state is running + // here and bag out if it is. + // FIXME: when we are doing non-stop mode for realz we'll have to instead + // track each thread, and only bag out if a thread is set running. + if (m_process_sp->GetPrivateState() != eStateStopped) { + output_sp->Printf("\nAborting stop hooks, hook %" PRIu64 + " set the program running.\n" + " Consider using '-G true' to make " + "stop hooks auto-continue.\n", + cur_hook_sp->GetID()); + somebody_restarted = true; + break; } } } + + output_sp->Flush(); + // Finally, if auto-continue was requested, do it now: - if (!did_restart && auto_continue) + // We only compute should_stop against the hook results if a hook got to run + // which is why we have to do this conjoint test. + if (!somebody_restarted && ((hooks_ran && !should_stop) || auto_continue)) m_process_sp->PrivateResume(); - - result.GetImmediateOutputStream()->Flush(); - result.GetImmediateErrorStream()->Flush(); } const TargetPropertiesSP &Target::GetGlobalProperties() { @@ -3128,20 +3129,17 @@ void Target::FinalizeFileActions(ProcessLaunchInfo &info) { // Target::StopHook Target::StopHook::StopHook(lldb::TargetSP target_sp, lldb::user_id_t uid) - : UserID(uid), m_target_sp(target_sp), m_commands(), m_specifier_sp(), + : UserID(uid), m_target_sp(target_sp), m_specifier_sp(), m_thread_spec_up() {} Target::StopHook::StopHook(const StopHook &rhs) : UserID(rhs.GetID()), m_target_sp(rhs.m_target_sp), - m_commands(rhs.m_commands), m_specifier_sp(rhs.m_specifier_sp), - m_thread_spec_up(), m_active(rhs.m_active), - m_auto_continue(rhs.m_auto_continue) { + m_specifier_sp(rhs.m_specifier_sp), m_thread_spec_up(), + m_active(rhs.m_active), m_auto_continue(rhs.m_auto_continue) { if (rhs.m_thread_spec_up) m_thread_spec_up = std::make_unique(*rhs.m_thread_spec_up); } -Target::StopHook::~StopHook() = default; - void Target::StopHook::SetSpecifier(SymbolContextSpecifier *specifier) { m_specifier_sp.reset(specifier); } @@ -3150,8 +3148,31 @@ void Target::StopHook::SetThreadSpecifier(ThreadSpec *specifier) { m_thread_spec_up.reset(specifier); } +bool Target::StopHook::ExecutionContextPasses(const ExecutionContext &exc_ctx) { + SymbolContextSpecifier *specifier = GetSpecifier(); + if (!specifier) + return true; + + bool will_run = true; + if (exc_ctx.GetFramePtr()) + will_run = GetSpecifier()->SymbolContextMatches( + exc_ctx.GetFramePtr()->GetSymbolContext(eSymbolContextEverything)); + if (will_run && GetThreadSpecifier() != nullptr) + will_run = + GetThreadSpecifier()->ThreadPassesBasicTests(exc_ctx.GetThreadRef()); + + return will_run; +} + void Target::StopHook::GetDescription(Stream *s, lldb::DescriptionLevel level) const { + + // For brief descriptions, only print the subclass description: + if (level == eDescriptionLevelBrief) { + GetSubclassDescription(s, level); + return; + } + unsigned indent_level = s->GetIndentLevel(); s->SetIndentLevel(indent_level + 2); @@ -3182,15 +3203,148 @@ void Target::StopHook::GetDescription(Stream *s, s->PutCString("\n"); s->SetIndentLevel(indent_level + 2); } + GetSubclassDescription(s, level); +} +void Target::StopHookCommandLine::GetSubclassDescription( + Stream *s, lldb::DescriptionLevel level) const { + // The brief description just prints the first command. + if (level == eDescriptionLevelBrief) { + if (m_commands.GetSize() == 1) + s->PutCString(m_commands.GetStringAtIndex(0)); + return; + } s->Indent("Commands: \n"); - s->SetIndentLevel(indent_level + 4); + s->SetIndentLevel(s->GetIndentLevel() + 4); uint32_t num_commands = m_commands.GetSize(); for (uint32_t i = 0; i < num_commands; i++) { s->Indent(m_commands.GetStringAtIndex(i)); s->PutCString("\n"); } - s->SetIndentLevel(indent_level); + s->SetIndentLevel(s->GetIndentLevel() - 4); +} + +// Target::StopHookCommandLine +void Target::StopHookCommandLine::SetActionFromString(const std::string &string) { + GetCommands().SplitIntoLines(string); +} + +void Target::StopHookCommandLine::SetActionFromStrings( + const std::vector &strings) { + for (auto string : strings) + GetCommands().AppendString(string.c_str()); +} + +bool Target::StopHookCommandLine::HandleStop(ExecutionContext &exc_ctx, + StreamSP output_sp) { + assert(exc_ctx.GetTargetPtr() && "Can't call PerformAction on a context " + "with no target"); + + if (!m_commands.GetSize()) + return true; + + CommandReturnObject result(false); + result.SetImmediateOutputStream(output_sp); + Debugger &debugger = exc_ctx.GetTargetPtr()->GetDebugger(); + CommandInterpreterRunOptions options; + options.SetStopOnContinue(true); + options.SetStopOnError(true); + options.SetEchoCommands(false); + options.SetPrintResults(true); + options.SetPrintErrors(true); + options.SetAddToHistory(false); + + // Force Async: + bool old_async = debugger.GetAsyncExecution(); + debugger.SetAsyncExecution(true); + debugger.GetCommandInterpreter().HandleCommands(GetCommands(), &exc_ctx, + options, result); + debugger.SetAsyncExecution(old_async); + + return true; +} + +// Target::StopHookScripted +Status Target::StopHookScripted::SetScriptCallback( + std::string class_name, StructuredData::ObjectSP extra_args_sp) { + Status error; + + ScriptInterpreter *script_interp = + GetTarget()->GetDebugger().GetScriptInterpreter(); + if (!script_interp) { + error.SetErrorString("No script interpreter installed."); + return error; + } + + m_class_name = class_name; + + m_extra_args = new StructuredDataImpl(); + + if (extra_args_sp) + m_extra_args->SetObjectSP(extra_args_sp); + + m_implementation_sp = script_interp->CreateScriptedStopHook( + GetTarget(), m_class_name.c_str(), m_extra_args, error); + + return error; +} + +bool Target::StopHookScripted::HandleStop(ExecutionContext &exc_ctx, + StreamSP output_sp) { + assert(exc_ctx.GetTargetPtr() && "Can't call HandleStop on a context " + "with no target"); + + ScriptInterpreter *script_interp = + GetTarget()->GetDebugger().GetScriptInterpreter(); + if (!script_interp) + return true; + + bool should_stop = script_interp->ScriptedStopHookHandleStop( + m_implementation_sp, exc_ctx, output_sp); + + return should_stop; +} + +void Target::StopHookScripted::GetSubclassDescription( + Stream *s, lldb::DescriptionLevel level) const { + if (level == eDescriptionLevelBrief) { + s->PutCString(m_class_name); + return; + } + s->Indent("Class:"); + s->Printf("%s\n", m_class_name.c_str()); + + // Now print the extra args: + // FIXME: We should use StructuredData.GetDescription on the m_extra_args + // but that seems to rely on some printing plugin that doesn't exist. + if (!m_extra_args->IsValid()) + return; + StructuredData::ObjectSP object_sp = m_extra_args->GetObjectSP(); + if (!object_sp || !object_sp->IsValid()) + return; + + StructuredData::Dictionary *as_dict = object_sp->GetAsDictionary(); + if (!as_dict || !as_dict->IsValid()) + return; + + uint32_t num_keys = as_dict->GetSize(); + if (num_keys == 0) + return; + + s->Indent("Args:\n"); + s->SetIndentLevel(s->GetIndentLevel() + 4); + + auto print_one_element = [&s](ConstString key, + StructuredData::Object *object) { + s->Indent(); + s->Printf("%s : %s\n", key.GetCString(), + object->GetStringValue().str().c_str()); + return true; + }; + + as_dict->ForEach(print_one_element); + + s->SetIndentLevel(s->GetIndentLevel() - 4); } static constexpr OptionEnumValueElement g_dynamic_value_types[] = { diff --git a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py new file mode 100644 index 0000000000000..e650778fe8e3b --- /dev/null +++ b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py @@ -0,0 +1,146 @@ +""" +Test stop hook functionality +""" + + + +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +class TestStopHooks(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + # If your test case doesn't stress debug info, the + # set this to true. That way it won't be run once for + # each debug info format. + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + self.build() + self.main_source_file = lldb.SBFileSpec("main.c") + full_path = os.path.join(self.getSourceDir(), "main.c") + self.main_start_line = line_number(full_path, "main()") + + def test_bad_handler(self): + """Test that we give a good error message when the handler is bad""" + self.script_setup() + result = lldb.SBCommandReturnObject() + + # First try the wrong number of args handler: + command = "target stop-hook add -P stop_hook.bad_handle_stop" + self.interp.HandleCommand(command, result) + self.assertFalse(result.Succeeded(), "Set the target stop hook") + self.assertIn("Wrong number of args", result.GetError(), "Got the wrong number of args error") + + # Next the no handler at all handler: + command = "target stop-hook add -P stop_hook.no_handle_stop" + + self.interp.HandleCommand(command, result) + self.assertFalse(result.Succeeded(), "Set the target stop hook") + self.assertIn('Class "stop_hook.no_handle_stop" is missing the required handle_stop callback', result.GetError(), "Got the right error") + + def test_stop_hooks_scripted(self): + """Test that a scripted stop hook works with no specifiers""" + self.stop_hooks_scripted(5) + + def test_stop_hooks_scripted_right_func(self): + """Test that a scripted stop hook fires when there is a function match""" + self.stop_hooks_scripted(5, "-n step_out_of_me") + + def test_stop_hooks_scripted_wrong_func(self): + """Test that a scripted stop hook doesn't fire when the function does not match""" + self.stop_hooks_scripted(0, "-n main") + + def test_stop_hooks_scripted_right_lines(self): + """Test that a scripted stop hook fires when there is a function match""" + self.stop_hooks_scripted(5, "-f main.c -l 1 -e %d"%(self.main_start_line)) + + def test_stop_hooks_scripted_wrong_lines(self): + """Test that a scripted stop hook doesn't fire when the function does not match""" + self.stop_hooks_scripted(0, "-f main.c -l %d -e 100"%(self.main_start_line)) + + def test_stop_hooks_scripted_auto_continue(self): + """Test that the --auto-continue flag works""" + self.do_test_auto_continue(False) + + def test_stop_hooks_scripted_return_false(self): + """Test that the returning False from a stop hook works""" + self.do_test_auto_continue(True) + + def do_test_auto_continue(self, return_true): + """Test that auto-continue works.""" + # We set auto-continue to 1 but the stop hook only applies to step_out_of_me, + # so we should end up stopped in main, having run the expression only once. + self.script_setup() + + result = lldb.SBCommandReturnObject() + + if return_true: + command = "target stop-hook add -P stop_hook.stop_handler -k increment -v 5 -k return_false -v 1 -n step_out_of_me" + else: + command = "target stop-hook add -G 1 -P stop_hook.stop_handler -k increment -v 5 -n step_out_of_me" + + self.interp.HandleCommand(command, result) + self.assertTrue(result.Succeeded, "Set the target stop hook") + + # First run to main. If we go straight to the first stop hook hit, + # run_to_source_breakpoint will fail because we aren't at original breakpoint + + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(self, + "Stop here first", self.main_source_file) + + # Now set the breakpoint on step_out_of_me, and make sure we run the + # expression, then continue back to main. + bkpt = target.BreakpointCreateBySourceRegex("Set a breakpoint here and step out", self.main_source_file) + self.assertTrue(bkpt.GetNumLocations() > 0, "Got breakpoints in step_out_of_me") + process.Continue() + + var = target.FindFirstGlobalVariable("g_var") + self.assertTrue(var.IsValid()) + self.assertEqual(var.GetValueAsUnsigned(), 5, "Updated g_var") + + func_name = process.GetSelectedThread().frames[0].GetFunctionName() + self.assertEqual("main", func_name, "Didn't stop at the expected function.") + + def script_setup(self): + self.interp = self.dbg.GetCommandInterpreter() + result = lldb.SBCommandReturnObject() + + # Bring in our script file: + script_name = os.path.join(self.getSourceDir(), "stop_hook.py") + command = "command script import " + script_name + self.interp.HandleCommand(command, result) + self.assertTrue(result.Succeeded(), "com scr imp failed: %s"%(result.GetError())) + + # set a breakpoint at the end of main to catch our auto-continue tests. + # Do it in the dummy target so it will get copied to our target even when + # we don't have a chance to stop. + dummy_target = self.dbg.GetDummyTarget() + dummy_target.BreakpointCreateBySourceRegex("return result", self.main_source_file) + + + def stop_hooks_scripted(self, g_var_value, specifier = None): + self.script_setup() + + result = lldb.SBCommandReturnObject() + + command = "target stop-hook add -P stop_hook.stop_handler -k increment -v 5 " + if specifier: + command += specifier + + self.interp.HandleCommand(command, result) + self.assertTrue(result.Succeeded, "Set the target stop hook") + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(self, + "Set a breakpoint here", self.main_source_file) + # At this point we've hit our stop hook so we should have run our expression, + # which increments g_var by the amount specified by the increment key's value. + while process.GetState() == lldb.eStateRunning: + continue + + var = target.FindFirstGlobalVariable("g_var") + self.assertTrue(var.IsValid()) + self.assertEqual(var.GetValueAsUnsigned(), g_var_value, "Updated g_var") diff --git a/lldb/test/API/commands/target/stop-hooks/TestStopHooks.py b/lldb/test/API/commands/target/stop-hooks/TestStopHooks.py index 64686afe627da..43447a845156d 100644 --- a/lldb/test/API/commands/target/stop-hooks/TestStopHooks.py +++ b/lldb/test/API/commands/target/stop-hooks/TestStopHooks.py @@ -1,5 +1,5 @@ """ -Test that stop hooks trigger on "step-out" +Test stop hook functionality """ @@ -18,10 +18,15 @@ class TestStopHooks(TestBase): # each debug info format. NO_DEBUG_INFO_TESTCASE = True - def test_stop_hooks_step_out(self): - """Test that stop hooks fire on step-out.""" + def setUp(self): + TestBase.setUp(self) self.build() self.main_source_file = lldb.SBFileSpec("main.c") + full_path = os.path.join(self.getSourceDir(), "main.c") + self.main_start_line = line_number(full_path, "main()") + + def test_stop_hooks_step_out(self): + """Test that stop hooks fire on step-out.""" self.step_out_test() def step_out_test(self): @@ -37,4 +42,3 @@ def step_out_test(self): self.assertTrue(var.IsValid()) self.assertEqual(var.GetValueAsUnsigned(), 1, "Updated g_var") - diff --git a/lldb/test/API/commands/target/stop-hooks/main.c b/lldb/test/API/commands/target/stop-hooks/main.c index d08ad14776b5a..16bfc0ce5db6b 100644 --- a/lldb/test/API/commands/target/stop-hooks/main.c +++ b/lldb/test/API/commands/target/stop-hooks/main.c @@ -10,5 +10,6 @@ int step_out_of_me() int main() { - return step_out_of_me(); + int result = step_out_of_me(); // Stop here first + return result; } diff --git a/lldb/test/API/commands/target/stop-hooks/stop_hook.py b/lldb/test/API/commands/target/stop-hooks/stop_hook.py new file mode 100644 index 0000000000000..1abc2bdeeb31b --- /dev/null +++ b/lldb/test/API/commands/target/stop-hooks/stop_hook.py @@ -0,0 +1,49 @@ +import lldb + +class stop_handler: + def __init__(self, target, extra_args, dict): + self.extra_args = extra_args + self.target = target + self.counter = 0 + ret_val = self.extra_args.GetValueForKey("return_false") + if ret_val: + self.ret_val = False + else: + self.ret_val = True + + def handle_stop(self, exe_ctx, stream): + self.counter += 1 + stream.Print("I have stopped %d times.\n"%(self.counter)) + increment = 1 + value = self.extra_args.GetValueForKey("increment") + if value: + incr_as_str = value.GetStringValue(100) + increment = int(incr_as_str) + else: + stream.Print("Could not find increment in extra_args\n") + frame = exe_ctx.GetFrame() + expression = "g_var += %d"%(increment) + expr_result = frame.EvaluateExpression(expression) + if not expr_result.GetError().Success(): + stream.Print("Error running expression: %s"%(expr_result.GetError().GetCString())) + value = exe_ctx.target.FindFirstGlobalVariable("g_var") + if not value.IsValid(): + stream.Print("Didn't get a valid value for g_var.") + else: + int_val = value.GetValueAsUnsigned() + stream.Print("Returning value: %d from handle_stop.\n"%(self.ret_val)) + return self.ret_val + +class bad_handle_stop: + def __init__(self, target, extra_args, dict): + print("I am okay") + + def handle_stop(self): + print("I am bad") + +class no_handle_stop: + def __init__(self, target, extra_args, dict): + print("I am okay") + + + diff --git a/lldb/test/Shell/Commands/Inputs/stop_hook.py b/lldb/test/Shell/Commands/Inputs/stop_hook.py new file mode 100644 index 0000000000000..e319ca9ec5bc8 --- /dev/null +++ b/lldb/test/Shell/Commands/Inputs/stop_hook.py @@ -0,0 +1,10 @@ +import lldb + +class stop_handler: + def __init__(self, target, extra_args, dict): + self.extra_args = extra_args + self.target = target + + def handle_stop(self, exe_ctx, stream): + stream.Print("I did indeed run\n") + return True diff --git a/lldb/test/Shell/Commands/command-stop-hook-output.test b/lldb/test/Shell/Commands/command-stop-hook-output.test new file mode 100644 index 0000000000000..7890bb3ca5e75 --- /dev/null +++ b/lldb/test/Shell/Commands/command-stop-hook-output.test @@ -0,0 +1,19 @@ +# REQUIRES: python +# RUN: %clang_host -g %S/Inputs/main.c -o %t +# RUN: %lldb %t -O 'command script import %S/Inputs/stop_hook.py' -s %s -o exit | FileCheck %s + +b main +# CHECK-LABEL: b main +# CHECK: Breakpoint 1: where = {{.*}}`main + +target stop-hook add -P stop_hook.stop_handler +# CHECK-LABEL: target stop-hook add -P stop_hook.stop_handler +# CHECK: Stop hook #1 added. + +run +# CHECK-LABEL: run +# CHECK: I did indeed run +# CHECK: Process {{.*}} stopped +# CHECK: stop reason = breakpoint 1 +# CHECK: frame #0: {{.*}}`main at main.c + diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index f661835d191b1..58ddf0c40a267 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -254,3 +254,17 @@ LLDBSWIGPython_GetDynamicSetting(void *module, const char *setting, const lldb::TargetSP &target_sp) { return nullptr; } + +extern "C" void *LLDBSwigPythonCreateScriptedStopHook( + lldb::TargetSP target_sp, const char *python_class_name, + const char *session_dictionary_name, + lldb_private::StructuredDataImpl *args_impl, Status &error) { + return nullptr; +} + +extern "C" bool +LLDBSwigPythonStopHookCallHandleStop(void *implementor, + lldb::ExecutionContextRefSP exc_ctx_sp, + lldb::StreamSP stream) { + return false; +} From 6c91e623e53703560e781b172e9160cae2cf8d21 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 29 Sep 2020 11:58:37 -0700 Subject: [PATCH 068/544] [CodeGen] emit CG profile for COFF object file Differential Revision: https://reviews.llvm.org/D87811 --- .../CodeGen/TargetLoweringObjectFileImpl.h | 1 + .../CodeGen/TargetLoweringObjectFileImpl.cpp | 65 ++++++++++++++++--- llvm/test/MC/COFF/cgprofile.ll | 51 +++++++++++++++ 3 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/COFF/cgprofile.ll diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 57cead3dde6c9..625137a1f998c 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -143,6 +143,7 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile { class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile { mutable unsigned NextUniqueID = 0; + const TargetMachine *TM = nullptr; public: ~TargetLoweringObjectFileCOFF() override = default; diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a7f560f3f2c24..676a465c49e2c 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1599,18 +1599,62 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, StringRef Section; GetObjCImageInfo(M, Version, Flags, Section); - if (Section.empty()) - return; + if (!Section.empty()) { + auto &C = getContext(); + auto *S = C.getCOFFSection(Section, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + Streamer.SwitchSection(S); + Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.emitInt32(Version); + Streamer.emitInt32(Flags); + Streamer.AddBlankLine(); + } auto &C = getContext(); - auto *S = C.getCOFFSection( - Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); - Streamer.SwitchSection(S); - Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); - Streamer.emitInt32(Version); - Streamer.emitInt32(Flags); - Streamer.AddBlankLine(); + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + + MDNode *CFGProfile = nullptr; + + for (const auto &MFE : ModuleFlags) { + StringRef Key = MFE.Key->getString(); + if (Key == "CG Profile") { + CFGProfile = cast(MFE.Val); + break; + } + } + + if (!CFGProfile) + return; + + auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * { + if (!MDO) + return nullptr; + auto V = cast(MDO); + const Function *F = cast(V->getValue()); + if (F->hasDLLImportStorageClass()) + return nullptr; + return TM->getSymbol(F); + }; + + for (const auto &Edge : CFGProfile->operands()) { + MDNode *E = cast(Edge); + const MCSymbol *From = GetSym(E->getOperand(0)); + const MCSymbol *To = GetSym(E->getOperand(1)); + // Skip null functions. This can happen if functions are dead stripped after + // the CGProfile pass has been run. + if (!From || !To) + continue; + uint64_t Count = cast(E->getOperand(2)) + ->getValue() + ->getUniqueInteger() + .getZExtValue(); + Streamer.emitCGProfileEntry( + MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C), + MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count); + } } void TargetLoweringObjectFileCOFF::emitLinkerDirectives( @@ -1675,6 +1719,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); + this->TM = &TM; const Triple &T = TM.getTargetTriple(); if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = diff --git a/llvm/test/MC/COFF/cgprofile.ll b/llvm/test/MC/COFF/cgprofile.ll new file mode 100644 index 0000000000000..0156aedb12219 --- /dev/null +++ b/llvm/test/MC/COFF/cgprofile.ll @@ -0,0 +1,51 @@ +; RUN: llc -filetype=asm %s -o - -mtriple x86_64-pc-windows-msvc | FileCheck %s +; RUN: llc -filetype=obj %s -o %t -mtriple x86_64-pc-windows-msvc +; RUN: llvm-readobj --cg-profile %t | FileCheck %s --check-prefix=OBJ + +declare void @b() + +define void @a() { + call void @b() + ret void +} + +define void @freq(i1 %cond) { + br i1 %cond, label %A, label %B +A: + call void @a(); + ret void +B: + call void @b(); + ret void +} + +!llvm.module.flags = !{!0} + +!0 = !{i32 5, !"CG Profile", !1} +!1 = !{!2, !3, !4, !5} +!2 = !{void ()* @a, void ()* @b, i64 32} +!3 = !{void (i1)* @freq, void ()* @a, i64 11} +!4 = !{void (i1)* @freq, void ()* @b, i64 20} +!5 = !{void (i1)* @freq, null, i64 20} + +; CHECK: .cg_profile a, b, 32 +; CHECK: .cg_profile freq, a, 11 +; CHECK: .cg_profile freq, b, 20 + +; OBJ: CGProfile [ +; OBJ: CGProfileEntry { +; OBJ: From: a +; OBJ: To: b +; OBJ: Weight: 32 +; OBJ: } +; OBJ: CGProfileEntry { +; OBJ: From: freq +; OBJ: To: a +; OBJ: Weight: 11 +; OBJ: } +; OBJ: CGProfileEntry { +; OBJ: From: freq +; OBJ: To: b +; OBJ: Weight: 20 +; OBJ: } +; OBJ:] From 15fbae8ac303d8601ea95418d4818cb50d0765e1 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Sep 2020 15:03:29 -0400 Subject: [PATCH 069/544] Use "default member initializer" instead of "in-class initializer" for diagnostics. This changes some diagnostics to use terminology from the standard rather than invented terminology, which improves consistency with other diagnostics as well. There are no functional changes intended other than wording and naming. --- clang/include/clang/Basic/DiagnosticParseKinds.td | 14 +++++++------- clang/include/clang/Basic/DiagnosticSemaKinds.td | 8 ++++---- clang/lib/Sema/SemaDeclCXX.cpp | 8 +++++--- clang/lib/Sema/SemaTemplateInstantiate.cpp | 7 ++++--- clang/test/Parser/MicrosoftExtensions.cpp | 2 +- clang/test/Parser/cxx-class.cpp | 12 ++++++------ clang/test/SemaCXX/PR9572.cpp | 2 +- clang/test/SemaCXX/class.cpp | 2 +- clang/test/SemaCXX/cxx98-compat.cpp | 2 +- clang/test/SemaCXX/member-init.cpp | 8 ++++---- 10 files changed, 34 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 1ac1e9d10a7a1..da4e1725269ff 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -709,7 +709,7 @@ def err_ms_property_expected_accessor_name : Error< def err_ms_property_expected_comma_or_rparen : Error< "expected ',' or ')' at end of property accessor list">; def err_ms_property_initializer : Error< - "property declaration cannot have an in-class initializer">; + "property declaration cannot have a default member initializer">; def warn_cxx20_compat_explicit_bool : Warning< "this expression will be parsed as explicit(bool) in C++20">, @@ -859,13 +859,13 @@ def warn_cxx98_compat_defaulted_deleted_function : Warning< "%select{defaulted|deleted}0 function definitions are incompatible with C++98">, InGroup, DefaultIgnore; -// C++11 in-class member initialization +// C++11 default member initialization def ext_nonstatic_member_init : ExtWarn< - "in-class initialization of non-static data member is a C++11 extension">, - InGroup; + "default member initializer for non-static data member is a C++11 " + "extension">, InGroup; def warn_cxx98_compat_nonstatic_member_init : Warning< - "in-class initialization of non-static data members is incompatible with C++98">, - InGroup, DefaultIgnore; + "default member initializer for non-static data members is incompatible with " + "C++98">, InGroup, DefaultIgnore; def ext_bitfield_member_init: ExtWarn< "default member initializer for bit-field is a C++20 extension">, InGroup; @@ -873,7 +873,7 @@ def warn_cxx17_compat_bitfield_member_init: Warning< "default member initializer for bit-field is incompatible with " "C++ standards before C++20">, InGroup, DefaultIgnore; def err_incomplete_array_member_init: Error< - "array bound cannot be deduced from an in-class initializer">; + "array bound cannot be deduced from a default member initializer">; // C++11 alias-declaration def ext_alias_declaration : ExtWarn< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 8f6c7b9400fae..ed11e0d1ce3c2 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1842,7 +1842,7 @@ def note_nontrivial_no_copy : Note< def note_nontrivial_user_provided : Note< "because %select{base class of |field of |}0type %1 has a user-provided " "%sub{select_special_member_kind}2">; -def note_nontrivial_in_class_init : Note< +def note_nontrivial_default_member_init : Note< "because field %0 has an initializer">; def note_nontrivial_param_type : Note< "because its parameter is %diff{of type $, not $|of the wrong type}2,3">; @@ -8521,12 +8521,12 @@ def err_in_class_initializer_literal_type : Error< "'constexpr' specifier">; def err_in_class_initializer_non_constant : Error< "in-class initializer for static data member is not a constant expression">; -def err_in_class_initializer_not_yet_parsed : Error< +def err_default_member_initializer_not_yet_parsed : Error< "default member initializer for %1 needed within definition of enclosing " "class %0 outside of member functions">; -def note_in_class_initializer_not_yet_parsed : Note< +def note_default_member_initializer_not_yet_parsed : Note< "default member initializer declared here">; -def err_in_class_initializer_cycle +def err_default_member_initializer_cycle : Error<"default member initializer for %0 uses itself">; def ext_in_class_initializer_non_constant : Extension< diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 6558a4f6d8b20..2d2b80573a696 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -9403,7 +9403,8 @@ static bool checkTrivialClassMembers(Sema &S, CXXRecordDecl *RD, // brace-or-equal-initializer if (CSM == Sema::CXXDefaultConstructor && FI->hasInClassInitializer()) { if (Diagnose) - S.Diag(FI->getLocation(), diag::note_nontrivial_in_class_init) << FI; + S.Diag(FI->getLocation(), diag::note_nontrivial_default_member_init) + << FI; return false; } @@ -15080,9 +15081,10 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { // constructor before the initializer is lexically complete will ultimately // come here at which point we can diagnose it. RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); - Diag(Loc, diag::err_in_class_initializer_not_yet_parsed) + Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) << OutermostClass << Field; - Diag(Field->getEndLoc(), diag::note_in_class_initializer_not_yet_parsed); + Diag(Field->getEndLoc(), + diag::note_default_member_initializer_not_yet_parsed); // Recover by marking the field invalid, unless we're in a SFINAE context. if (!isSFINAEContext()) Field->setInvalidDecl(); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 11e03c517d015..555d8e036a64c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2972,9 +2972,10 @@ bool Sema::InstantiateInClassInitializer( RecordDecl *PatternRD = Pattern->getParent(); RecordDecl *OutermostClass = PatternRD->getOuterLexicalRecordContext(); Diag(PointOfInstantiation, - diag::err_in_class_initializer_not_yet_parsed) + diag::err_default_member_initializer_not_yet_parsed) << OutermostClass << Pattern; - Diag(Pattern->getEndLoc(), diag::note_in_class_initializer_not_yet_parsed); + Diag(Pattern->getEndLoc(), + diag::note_default_member_initializer_not_yet_parsed); Instantiation->setInvalidDecl(); return true; } @@ -2984,7 +2985,7 @@ bool Sema::InstantiateInClassInitializer( return true; if (Inst.isAlreadyInstantiating()) { // Error out if we hit an instantiation cycle for this initializer. - Diag(PointOfInstantiation, diag::err_in_class_initializer_cycle) + Diag(PointOfInstantiation, diag::err_default_member_initializer_cycle) << Instantiation; return true; } diff --git a/clang/test/Parser/MicrosoftExtensions.cpp b/clang/test/Parser/MicrosoftExtensions.cpp index ddbe5aaef7790..52f40677a1350 100644 --- a/clang/test/Parser/MicrosoftExtensions.cpp +++ b/clang/test/Parser/MicrosoftExtensions.cpp @@ -349,7 +349,7 @@ struct StructWithProperty { __declspec(property(get=GetV,)) int V10; // expected-error {{expected 'get' or 'put' in property declaration}} __declspec(property(get=GetV,put=SetV)) int V11; // no-warning __declspec(property(get=GetV,put=SetV,get=GetV)) int V12; // expected-error {{property declaration specifies 'get' accessor twice}} - __declspec(property(get=GetV)) int V13 = 3; // expected-error {{property declaration cannot have an in-class initializer}} + __declspec(property(get=GetV)) int V13 = 3; // expected-error {{property declaration cannot have a default member initializer}} int GetV() { return 123; } void SetV(int v) {} diff --git a/clang/test/Parser/cxx-class.cpp b/clang/test/Parser/cxx-class.cpp index 576c6d7e8b976..efd1a6dc03e77 100644 --- a/clang/test/Parser/cxx-class.cpp +++ b/clang/test/Parser/cxx-class.cpp @@ -229,34 +229,34 @@ namespace BadFriend { class PR20760_a { int a = ); // expected-error {{expected expression}} #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif int b = }; // expected-error {{expected expression}} #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif int c = ]; // expected-error {{expected expression}} #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif }; class PR20760_b { int d = d); // expected-error {{expected ';'}} #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif int e = d]; // expected-error {{expected ';'}} #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif int f = d // expected-error {{expected ';'}} #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif }; diff --git a/clang/test/SemaCXX/PR9572.cpp b/clang/test/SemaCXX/PR9572.cpp index cbfa6c76828de..d0ba32eb1ab10 100644 --- a/clang/test/SemaCXX/PR9572.cpp +++ b/clang/test/SemaCXX/PR9572.cpp @@ -21,7 +21,7 @@ struct Foo : public Base { const int kBlah = 3; #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif Foo(); diff --git a/clang/test/SemaCXX/class.cpp b/clang/test/SemaCXX/class.cpp index 0ed6a77b9b122..84334f094381d 100644 --- a/clang/test/SemaCXX/class.cpp +++ b/clang/test/SemaCXX/class.cpp @@ -44,7 +44,7 @@ class C { int i = 0; #if __cplusplus <= 199711L - // expected-warning@-2 {{in-class initialization of non-static data member is a C++11 extension}} + // expected-warning@-2 {{default member initializer for non-static data member is a C++11 extension}} #endif static int si = 0; // expected-error {{non-const static data member must be initialized out of line}} static const NestedC ci = 0; // expected-error {{static data member of type 'const C::NestedC' must be initialized out of line}} diff --git a/clang/test/SemaCXX/cxx98-compat.cpp b/clang/test/SemaCXX/cxx98-compat.cpp index e221259859bc5..581b620c70727 100644 --- a/clang/test/SemaCXX/cxx98-compat.cpp +++ b/clang/test/SemaCXX/cxx98-compat.cpp @@ -122,7 +122,7 @@ void RangeFor() { } struct InClassInit { - int n = 0; // expected-warning {{in-class initialization of non-static data members is incompatible with C++98}} + int n = 0; // expected-warning {{default member initializer for non-static data members is incompatible with C++98}} }; struct OverrideControlBase { diff --git a/clang/test/SemaCXX/member-init.cpp b/clang/test/SemaCXX/member-init.cpp index 33f16940bbf87..24291064296f9 100644 --- a/clang/test/SemaCXX/member-init.cpp +++ b/clang/test/SemaCXX/member-init.cpp @@ -21,20 +21,20 @@ struct Recurse { // expected-error {{initializer for 'n' needed}} }; struct UnknownBound { - int as[] = { 1, 2, 3 }; // expected-error {{array bound cannot be deduced from an in-class initializer}} + int as[] = { 1, 2, 3 }; // expected-error {{array bound cannot be deduced from a default member initializer}} int bs[4] = { 4, 5, 6, 7 }; - int cs[] = { 8, 9, 10 }; // expected-error {{array bound cannot be deduced from an in-class initializer}} + int cs[] = { 8, 9, 10 }; // expected-error {{array bound cannot be deduced from a default member initializer}} }; template struct T { static const int B; }; template<> struct T<2> { template using B = int; }; const int C = 0, D = 0; struct S { - int as[] = { decltype(x)::B(0) }; // expected-error {{array bound cannot be deduced from an in-class initializer}} + int as[] = { decltype(x)::B(0) }; // expected-error {{array bound cannot be deduced from a default member initializer}} T x; // test that we handle invalid array bound deductions without crashing when the declarator name is itself invalid operator int[](){}; // expected-error {{'operator int' cannot be the name of a variable or data member}} \ - // expected-error {{array bound cannot be deduced from an in-class initializer}} + // expected-error {{array bound cannot be deduced from a default member initializer}} }; struct ThrowCtor { ThrowCtor(int) noexcept(false); }; From ac8a51c701ebd332a44944f8ff8545c21bc438ce Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 29 Sep 2020 20:48:32 +0200 Subject: [PATCH 070/544] [ValueTracking] Early exit known non zero for phis After D88276 we no longer expect computeKnownBits() to prove non-zeroness for cases where isKnownNonZero() can't, so don't fall through to it. --- llvm/lib/Analysis/ValueTracking.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 11377c467bee7..11eb5f303c550 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2564,14 +2564,12 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // Check if all incoming values are non-zero using recursion. Query RecQ = Q; unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); - bool AllNonZero = llvm::all_of(PN->operands(), [&](const Use &U) { + return llvm::all_of(PN->operands(), [&](const Use &U) { if (U.get() == PN) return true; RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); }); - if (AllNonZero) - return true; } // ExtractElement else if (const auto *EEI = dyn_cast(V)) { From a89fc12653c520a5a70249e07c0a394584f4abbe Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Tue, 29 Sep 2020 11:11:27 -0700 Subject: [PATCH 071/544] [mlir] Support return and call ops in bare-ptr calling convention This patch adds support for the 'return' and 'call' ops to the bare-ptr calling convention. These changes also align the bare-ptr calling convention code with the latest changes in the default calling convention and reduce the amount of customization code needed. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D87724 --- .../StandardToLLVM/ConvertStandardToLLVM.h | 21 +- .../StandardToLLVM/StandardToLLVM.cpp | 266 ++++++++++++------ .../convert-static-memref-ops.mlir | 48 +++- 3 files changed, 232 insertions(+), 103 deletions(-) diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index ab047a08f404c..d98a0ff6efb36 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -27,6 +27,7 @@ class Type; namespace mlir { +class BaseMemRefType; class ComplexType; class LLVMTypeConverter; class UnrankedMemRefType; @@ -74,15 +75,28 @@ class LLVMTypeConverter : public TypeConverter { SignatureConversion &result); /// Convert a non-empty list of types to be returned from a function into a - /// supported LLVM IR type. In particular, if more than one values is + /// supported LLVM IR type. In particular, if more than one value is /// returned, create an LLVM IR structure type with elements that correspond /// to each of the MLIR types converted with `convertType`. Type packFunctionResults(ArrayRef types); + /// Convert a type in the context of the default or bare pointer calling + /// convention. Calling convention sensitive types, such as MemRefType and + /// UnrankedMemRefType, are converted following the specific rules for the + /// calling convention. Calling convention independent types are converted + /// following the default LLVM type conversions. + Type convertCallingConventionType(Type type); + + /// Promote the bare pointers in 'values' that resulted from memrefs to + /// descriptors. 'stdTypes' holds the types of 'values' before the conversion + /// to the LLVM-IR dialect (i.e., MemRefType, or any other Standard type). + void promoteBarePtrsToDescriptors(ConversionPatternRewriter &rewriter, + Location loc, ArrayRef stdTypes, + SmallVectorImpl &values); + /// Returns the MLIR context. MLIRContext &getContext(); - /// Returns the LLVM dialect. LLVM::LLVMDialect *getDialect() { return llvmDialect; } @@ -179,6 +193,9 @@ class LLVMTypeConverter : public TypeConverter { // runtime rank and a pointer to the static ranked memref desc Type convertUnrankedMemRefType(UnrankedMemRefType type); + /// Convert a memref type to a bare pointer to the memref element type. + Type convertMemRefToBarePtr(BaseMemRefType type); + // Convert a 1D vector type into an LLVM vector type. Type convertVectorType(VectorType type); diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 186c8ec48fa54..c77c0b529cafd 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -80,37 +80,12 @@ LogicalResult mlir::structFuncArgTypeConverter(LLVMTypeConverter &converter, return success(); } -/// Convert a MemRef type to a bare pointer to the MemRef element type. -static Type convertMemRefTypeToBarePtr(LLVMTypeConverter &converter, - MemRefType type) { - int64_t offset; - SmallVector strides; - if (failed(getStridesAndOffset(type, strides, offset))) - return {}; - - LLVM::LLVMType elementType = - unwrap(converter.convertType(type.getElementType())); - if (!elementType) - return {}; - return elementType.getPointerTo(type.getMemorySpace()); -} - /// Callback to convert function argument types. It converts MemRef function /// arguments to bare pointers to the MemRef element type. LogicalResult mlir::barePtrFuncArgTypeConverter(LLVMTypeConverter &converter, Type type, SmallVectorImpl &result) { - // TODO: Add support for unranked memref. - if (auto memrefTy = type.dyn_cast()) { - auto llvmTy = convertMemRefTypeToBarePtr(converter, memrefTy); - if (!llvmTy) - return failure(); - - result.push_back(llvmTy); - return success(); - } - - auto llvmTy = converter.convertType(type); + auto llvmTy = converter.convertCallingConventionType(type); if (!llvmTy) return failure(); @@ -272,14 +247,14 @@ SmallVector LLVMTypeConverter::convertUnrankedMemRefSignature() { // Function has one VoidType result. If MLIR Function has more than one result, // they are into an LLVM StructType in their order of appearance. LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature( - FunctionType type, bool isVariadic, + FunctionType funcTy, bool isVariadic, LLVMTypeConverter::SignatureConversion &result) { // Select the argument converter depending on the calling convetion. auto funcArgConverter = options.useBarePtrCallConv ? barePtrFuncArgTypeConverter : structFuncArgTypeConverter; // Convert argument types one by one and check for errors. - for (auto &en : llvm::enumerate(type.getInputs())) { + for (auto &en : llvm::enumerate(funcTy.getInputs())) { Type type = en.value(); SmallVector converted; if (failed(funcArgConverter(*this, type, converted))) @@ -296,9 +271,9 @@ LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature( // if it returns on element, convert it, otherwise pack the result types into // a struct. LLVM::LLVMType resultType = - type.getNumResults() == 0 + funcTy.getNumResults() == 0 ? LLVM::LLVMType::getVoidTy(&getContext()) - : unwrap(packFunctionResults(type.getResults())); + : unwrap(packFunctionResults(funcTy.getResults())); if (!resultType) return {}; return LLVM::LLVMType::getFunctionTy(resultType, argTypes, isVariadic); @@ -394,6 +369,36 @@ Type LLVMTypeConverter::convertUnrankedMemRefType(UnrankedMemRefType type) { return LLVM::LLVMType::getStructTy(rankTy, ptrTy); } +/// Convert a memref type to a bare pointer to the memref element type. +Type LLVMTypeConverter::convertMemRefToBarePtr(BaseMemRefType type) { + if (type.isa()) + // Unranked memref is not supported in the bare pointer calling convention. + return {}; + + // Check that the memref has static shape, strides and offset. Otherwise, it + // cannot be lowered to a bare pointer. + auto memrefTy = type.cast(); + if (!memrefTy.hasStaticShape()) + return {}; + + int64_t offset = 0; + SmallVector strides; + if (failed(getStridesAndOffset(memrefTy, strides, offset))) + return {}; + + for (int64_t stride : strides) + if (ShapedType::isDynamicStrideOrOffset(stride)) + return {}; + + if (ShapedType::isDynamicStrideOrOffset(offset)) + return {}; + + LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); + if (!elementType) + return {}; + return elementType.getPointerTo(type.getMemorySpace()); +} + // Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when // n > 1. // For example, `vector<4 x f32>` converts to `!llvm.type<"<4 x float>">` and @@ -410,6 +415,37 @@ Type LLVMTypeConverter::convertVectorType(VectorType type) { return vectorType; } +/// Convert a type in the context of the default or bare pointer calling +/// convention. Calling convention sensitive types, such as MemRefType and +/// UnrankedMemRefType, are converted following the specific rules for the +/// calling convention. Calling convention independent types are converted +/// following the default LLVM type conversions. +Type LLVMTypeConverter::convertCallingConventionType(Type type) { + if (options.useBarePtrCallConv) + if (auto memrefTy = type.dyn_cast()) + return convertMemRefToBarePtr(memrefTy); + + return convertType(type); +} + +/// Promote the bare pointers in 'values' that resulted from memrefs to +/// descriptors. 'stdTypes' holds they types of 'values' before the conversion +/// to the LLVM-IR dialect (i.e., MemRefType, or any other Standard type). +void LLVMTypeConverter::promoteBarePtrsToDescriptors( + ConversionPatternRewriter &rewriter, Location loc, ArrayRef stdTypes, + SmallVectorImpl &values) { + assert(stdTypes.size() == values.size() && + "The number of types and values doesn't match"); + for (unsigned i = 0, end = values.size(); i < end; ++i) { + Type stdTy = stdTypes[i]; + if (auto memrefTy = stdTy.dyn_cast()) + values[i] = MemRefDescriptor::fromStaticShape(rewriter, loc, *this, + memrefTy, values[i]); + else + llvm_unreachable("Unranked memrefs are not supported"); + } +} + ConvertToLLVMPattern::ConvertToLLVMPattern(StringRef rootOpName, MLIRContext *context, LLVMTypeConverter &typeConverter, @@ -1088,18 +1124,6 @@ namespace { struct FuncOpConversionBase : public ConvertOpToLLVMPattern { protected: using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - using UnsignedTypePair = std::pair; - - // Gather the positions and types of memref-typed arguments in a given - // FunctionType. - void getMemRefArgIndicesAndTypes( - FunctionType type, SmallVectorImpl &argsInfo) const { - argsInfo.reserve(type.getNumInputs()); - for (auto en : llvm::enumerate(type.getInputs())) { - if (en.value().isa()) - argsInfo.push_back({en.index(), en.value()}); - } - } // Convert input FuncOp to LLVMFuncOp by using the LLVMTypeConverter provided // to this legalization pattern. @@ -1192,11 +1216,10 @@ struct BarePtrFuncOpConversion : public FuncOpConversionBase { ConversionPatternRewriter &rewriter) const override { auto funcOp = cast(op); - // Store the positions and type of memref-typed arguments so that we can - // promote them to MemRef descriptor structs at the beginning of the - // function. - SmallVector promotedArgsInfo; - getMemRefArgIndicesAndTypes(funcOp.getType(), promotedArgsInfo); + // Store the type of memref-typed arguments before the conversion so that we + // can promote them to MemRef descriptor at the beginning of the function. + SmallVector oldArgTypes = + llvm::to_vector<8>(funcOp.getType().getInputs()); auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter); if (!newFuncOp) @@ -1206,27 +1229,42 @@ struct BarePtrFuncOpConversion : public FuncOpConversionBase { return success(); } - // Promote bare pointers from MemRef arguments to a MemRef descriptor struct - // at the beginning of the function so that all the MemRefs in the function - // have a uniform representation. - Block *firstBlock = &newFuncOp.getBody().front(); - rewriter.setInsertionPoint(firstBlock, firstBlock->begin()); - auto funcLoc = funcOp.getLoc(); - for (const auto &argInfo : promotedArgsInfo) { - // TODO: Add support for unranked MemRefs. - if (auto memrefType = argInfo.second.dyn_cast()) { - // Replace argument with a placeholder (undef), promote argument to a - // MemRef descriptor and replace placeholder with the last instruction - // of the MemRef descriptor. The placeholder is needed to avoid - // replacing argument uses in the MemRef descriptor instructions. - BlockArgument arg = firstBlock->getArgument(argInfo.first); - Value placeHolder = - rewriter.create(funcLoc, arg.getType()); - rewriter.replaceUsesOfBlockArgument(arg, placeHolder); - auto desc = MemRefDescriptor::fromStaticShape( - rewriter, funcLoc, typeConverter, memrefType, arg); - rewriter.replaceOp(placeHolder.getDefiningOp(), {desc}); - } + // Promote bare pointers from memref arguments to memref descriptors at the + // beginning of the function so that all the memrefs in the function have a + // uniform representation. + Block *entryBlock = &newFuncOp.getBody().front(); + auto blockArgs = entryBlock->getArguments(); + assert(blockArgs.size() == oldArgTypes.size() && + "The number of arguments and types doesn't match"); + + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(entryBlock); + for (auto it : llvm::zip(blockArgs, oldArgTypes)) { + BlockArgument arg = std::get<0>(it); + Type argTy = std::get<1>(it); + + // Unranked memrefs are not supported in the bare pointer calling + // convention. We should have bailed out before in the presence of + // unranked memrefs. + assert(!argTy.isa() && + "Unranked memref is not supported"); + auto memrefTy = argTy.dyn_cast(); + if (!memrefTy) + continue; + + // Replace barePtr with a placeholder (undef), promote barePtr to a ranked + // or unranked memref descriptor and replace placeholder with the last + // instruction of the memref descriptor. + // TODO: The placeholder is needed to avoid replacing barePtr uses in the + // MemRef descriptor instructions. We may want to have a utility in the + // rewriter to properly handle this use case. + Location loc = op->getLoc(); + auto placeholder = rewriter.create(loc, memrefTy); + rewriter.replaceUsesOfBlockArgument(arg, placeholder); + + Value desc = MemRefDescriptor::fromStaticShape( + rewriter, loc, typeConverter, memrefTy, arg); + rewriter.replaceOp(placeholder, {desc}); } rewriter.eraseOp(op); @@ -2138,12 +2176,22 @@ struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern { rewriter.getI64ArrayAttr(i))); } } - if (failed(copyUnrankedDescriptors( - rewriter, op->getLoc(), this->typeConverter, op->getResultTypes(), - results, /*toDynamic=*/false))) + + if (this->typeConverter.getOptions().useBarePtrCallConv) { + // For the bare-ptr calling convention, promote memref results to + // descriptors. + assert(results.size() == resultTypes.size() && + "The number of arguments and types doesn't match"); + this->typeConverter.promoteBarePtrsToDescriptors(rewriter, op->getLoc(), + resultTypes, results); + } else if (failed(copyUnrankedDescriptors(rewriter, op->getLoc(), + this->typeConverter, resultTypes, + results, + /*toDynamic=*/false))) { return failure(); - rewriter.replaceOp(op, results); + } + rewriter.replaceOp(op, results); return success(); } }; @@ -2706,11 +2754,32 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern { LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); unsigned numArguments = op->getNumOperands(); - auto updatedOperands = llvm::to_vector<4>(operands); - copyUnrankedDescriptors(rewriter, op->getLoc(), typeConverter, - op->getOperands().getTypes(), updatedOperands, - /*toDynamic=*/true); + SmallVector updatedOperands; + + if (typeConverter.getOptions().useBarePtrCallConv) { + // For the bare-ptr calling convention, extract the aligned pointer to + // be returned from the memref descriptor. + for (auto it : llvm::zip(op->getOperands(), operands)) { + Type oldTy = std::get<0>(it).getType(); + Value newOperand = std::get<1>(it); + if (oldTy.isa()) { + MemRefDescriptor memrefDesc(newOperand); + newOperand = memrefDesc.alignedPtr(rewriter, loc); + } else if (oldTy.isa()) { + // Unranked memref is not supported in the bare pointer calling + // convention. + return failure(); + } + updatedOperands.push_back(newOperand); + } + } else { + updatedOperands = llvm::to_vector<4>(operands); + copyUnrankedDescriptors(rewriter, loc, typeConverter, + op->getOperands().getTypes(), updatedOperands, + /*toDynamic=*/true); + } // If ReturnOp has 0 or 1 operand, create it and return immediately. if (numArguments == 0) { @@ -2729,10 +2798,10 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern { auto packedType = typeConverter.packFunctionResults( llvm::to_vector<4>(op->getOperandTypes())); - Value packed = rewriter.create(op->getLoc(), packedType); + Value packed = rewriter.create(loc, packedType); for (unsigned i = 0; i < numArguments; ++i) { packed = rewriter.create( - op->getLoc(), packedType, packed, updatedOperands[i], + loc, packedType, packed, updatedOperands[i], rewriter.getI64ArrayAttr(i)); } rewriter.replaceOpWithNewOp(op, TypeRange(), packed, @@ -3380,17 +3449,21 @@ void mlir::populateStdToLLVMConversionPatterns( populateStdToLLVMMemoryConversionPatterns(converter, patterns); } -// Create an LLVM IR structure type if there is more than one result. +/// Convert a non-empty list of types to be returned from a function into a +/// supported LLVM IR type. In particular, if more than one value is returned, +/// create an LLVM IR structure type with elements that correspond to each of +/// the MLIR types converted with `convertType`. Type LLVMTypeConverter::packFunctionResults(ArrayRef types) { assert(!types.empty() && "expected non-empty list of type"); if (types.size() == 1) - return convertType(types.front()); + return convertCallingConventionType(types.front()); SmallVector resultTypes; resultTypes.reserve(types.size()); for (auto t : types) { - auto converted = convertType(t).dyn_cast_or_null(); + auto converted = + convertCallingConventionType(t).dyn_cast_or_null(); if (!converted) return {}; resultTypes.push_back(converted); @@ -3426,16 +3499,27 @@ SmallVector LLVMTypeConverter::promoteOperands(Location loc, auto operand = std::get<0>(it); auto llvmOperand = std::get<1>(it); - if (operand.getType().isa()) { - UnrankedMemRefDescriptor::unpack(builder, loc, llvmOperand, - promotedOperands); - continue; - } - if (auto memrefType = operand.getType().dyn_cast()) { - MemRefDescriptor::unpack(builder, loc, llvmOperand, - operand.getType().cast(), - promotedOperands); - continue; + if (options.useBarePtrCallConv) { + // For the bare-ptr calling convention, we only have to extract the + // aligned pointer of a memref. + if (auto memrefType = operand.getType().dyn_cast()) { + MemRefDescriptor desc(llvmOperand); + llvmOperand = desc.alignedPtr(builder, loc); + } else if (operand.getType().isa()) { + llvm_unreachable("Unranked memrefs are not supported"); + } + } else { + if (operand.getType().isa()) { + UnrankedMemRefDescriptor::unpack(builder, loc, llvmOperand, + promotedOperands); + continue; + } + if (auto memrefType = operand.getType().dyn_cast()) { + MemRefDescriptor::unpack(builder, loc, llvmOperand, + operand.getType().cast(), + promotedOperands); + continue; + } } promotedOperands.push_back(llvmOperand); diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir index 5cccca3795b3b..5dd36ba6d2acc 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -14,13 +14,13 @@ func @check_noalias(%static : memref<2xf32> {llvm.noalias = true}, %other : memr // CHECK-COUNT-5: !llvm.i64 // CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-LABEL: func @check_static_return -// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { +// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr) -> !llvm.ptr { func @check_static_return(%static : memref<32x18xf32>) -> memref<32x18xf32> { // CHECK: llvm.return %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR-NEXT: %[[base:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[base0:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 @@ -31,7 +31,8 @@ func @check_static_return(%static : memref<32x18xf32>) -> memref<32x18xf32> { // BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR-NEXT: llvm.return %[[ins4]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[base1:.*]] = llvm.extractvalue %[[ins4]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: llvm.return %[[base1]] : !llvm.ptr return %static : memref<32x18xf32> } @@ -42,13 +43,13 @@ func @check_static_return(%static : memref<32x18xf32>) -> memref<32x18xf32> { // CHECK-COUNT-5: !llvm.i64 // CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-LABEL: func @check_static_return_with_offset -// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { +// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr) -> !llvm.ptr { func @check_static_return_with_offset(%static : memref<32x18xf32, offset:7, strides:[22,1]>) -> memref<32x18xf32, offset:7, strides:[22,1]> { // CHECK: llvm.return %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR-NEXT: %[[base:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[base0:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(7 : index) : !llvm.i64 // BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 @@ -59,14 +60,15 @@ func @check_static_return_with_offset(%static : memref<32x18xf32, offset:7, stri // BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR-NEXT: llvm.return %[[ins4]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[base1:.*]] = llvm.extractvalue %[[ins4]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: llvm.return %[[base1]] : !llvm.ptr return %static : memref<32x18xf32, offset:7, strides:[22,1]> } // ----- // CHECK-LABEL: func @zero_d_alloc() -> !llvm.struct<(ptr, ptr, i64)> { -// BAREPTR-LABEL: func @zero_d_alloc() -> !llvm.struct<(ptr, ptr, i64)> { +// BAREPTR-LABEL: func @zero_d_alloc() -> !llvm.ptr { func @zero_d_alloc() -> memref { // CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr @@ -174,7 +176,7 @@ func @aligned_1d_alloc() -> memref<42xf32> { // ----- // CHECK-LABEL: func @static_alloc() -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { -// BAREPTR-LABEL: func @static_alloc() -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { +// BAREPTR-LABEL: func @static_alloc() -> !llvm.ptr { func @static_alloc() -> memref<32x18xf32> { // CHECK: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // CHECK-NEXT: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 @@ -388,3 +390,29 @@ func @static_memref_dim(%static : memref<42x32x15x13x27xf32>) { %4 = dim %static, %c4 : memref<42x32x15x13x27xf32> return } + +// ----- + +// BAREPTR: llvm.func @foo(!llvm.ptr) -> !llvm.ptr +func @foo(memref<10xi8>) -> memref<20xi8> + +// BAREPTR-LABEL: func @check_memref_func_call +// BAREPTR-SAME: %[[in:.*]]: !llvm.ptr) -> !llvm.ptr +func @check_memref_func_call(%in : memref<10xi8>) -> memref<20xi8> { + // BAREPTR: %[[inDesc:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] + // BAREPTR-NEXT: %[[barePtr:.*]] = llvm.extractvalue %[[inDesc]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: %[[call:.*]] = llvm.call @foo(%[[barePtr]]) : (!llvm.ptr) -> !llvm.ptr + // BAREPTR-NEXT: %[[desc0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: %[[desc1:.*]] = llvm.insertvalue %[[call]], %[[desc0]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: %[[desc2:.*]] = llvm.insertvalue %[[call]], %[[desc1]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // BAREPTR-NEXT: %[[desc4:.*]] = llvm.insertvalue %[[c0]], %[[desc2]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: %[[c20:.*]] = llvm.mlir.constant(20 : index) : !llvm.i64 + // BAREPTR-NEXT: %[[desc6:.*]] = llvm.insertvalue %[[c20]], %[[desc4]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // BAREPTR-NEXT: %[[outDesc:.*]] = llvm.insertvalue %[[c1]], %[[desc6]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %res = call @foo(%in) : (memref<10xi8>) -> (memref<20xi8>) + // BAREPTR-NEXT: %[[res:.*]] = llvm.extractvalue %[[outDesc]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // BAREPTR-NEXT: llvm.return %[[res]] : !llvm.ptr + return %res : memref<20xi8> +} From 0527c8749b90cc742077ae9b3def80efde721090 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 29 Sep 2020 15:25:04 -0400 Subject: [PATCH 072/544] [InstCombine] ease alignment restriction for converting masked load to normal load I think we initially made this fold conservative to be safer, but we do not need the alignment attribute/metadata limitation because the masked load intrinsic itself specifies the alignment. A normal vector load is better for IR transforms and should be no worse in codegen than the masked alternative. If it is worse for some target, the backend can reverse this transform. Differential Revision: https://reviews.llvm.org/D88505 --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 5 ++--- llvm/test/Transforms/InstCombine/masked_intrinsics.ll | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 90571bd033670..465191b4ae1f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -289,9 +289,8 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query - if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment, - II.getModule()->getDataLayout(), &II, - nullptr)) { + if (isDereferenceablePointer(LoadPtr, II.getType(), + II.getModule()->getDataLayout(), &II, nullptr)) { Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index a16f368ddb5cf..684e008cc6bcd 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -100,8 +100,9 @@ define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable ; CHECK-LABEL: @load_speculative_less_aligned( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 From d689a64edd568e3048eb33976aa148251614d510 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Sep 2020 15:30:42 -0400 Subject: [PATCH 073/544] [libc++][ci] Add a script to describe when to trigger libc++ CI builds --- libcxx/utils/ci/buildkite-pipeline-trigger.sh | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100755 libcxx/utils/ci/buildkite-pipeline-trigger.sh diff --git a/libcxx/utils/ci/buildkite-pipeline-trigger.sh b/libcxx/utils/ci/buildkite-pipeline-trigger.sh new file mode 100755 index 0000000000000..333cc285ab7c3 --- /dev/null +++ b/libcxx/utils/ci/buildkite-pipeline-trigger.sh @@ -0,0 +1,37 @@ +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## + +# +# This file generates a Buildkite pipeline that triggers the libc++ CI +# job(s) if needed. The intended usage of this script is to be piped +# into `buildkite-agent pipeline upload`. +# + +if git diff --name-only HEAD~ | grep -q -E "libcxx/|libcxxabi/"; then + skip="false" +else + skip="The commit does not touch libc++ or libc++abi" +fi + +reviewID="$(git log --format=%B -n 1 | sed -nE 's/^Review-ID:[[:space:]]*(.+)$/\1/p')" +if [[ "${reviewID}" != "" ]]; then + buildMessage="https://llvm.org/${reviewID}" +else + buildMessage="Push to branch ${BUILDKITE_BRANCH}" +fi + +cat < Date: Tue, 29 Sep 2020 15:29:11 -0400 Subject: [PATCH 074/544] [libc++][ci] Improve the phabricator-report script - Detect whether a build has passed more accurately - Retry pushing the status to Phabricator - Allow running on a non-review branch --- libcxx/utils/ci/phabricator-report | 77 +++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/libcxx/utils/ci/phabricator-report b/libcxx/utils/ci/phabricator-report index dffe00ba0b7c8..71bf298e8f535 100755 --- a/libcxx/utils/ci/phabricator-report +++ b/libcxx/utils/ci/phabricator-report @@ -12,12 +12,57 @@ import io import os import phabricator import re +import socket import subprocess import sys import time LLVM_REVIEWS_API = "https://reviews.llvm.org/api/" +def exponentialBackoffRetry(f, exception, maxAttempts=3): + """Tries calling a function, but retry with exponential backoff if the + function fails with the specified exception. + """ + waitTime = 1 + attempts = 0 + while True: + try: + f() + break + except exception as e: + attempts += 1 + if attempts == maxAttempts: + raise e + else: + time.sleep(waitTime) + waitTime *= 2 + +def buildPassed(log): + """ + Tries to guess whether a build has passed or not based on the logs + produced by it. + + This is really hacky -- it would be better to use the status of the + script that runs the tests, however that script is being piped into + this script, so we can't know its exit status. What we do here is + basically look for abnormal CMake or Lit output, but that is tightly + coupled to the specific CI we're running. + """ + # Lit reporting failures + matches = re.findall(r"^\s*Failed\s*:\s*(\d+)$", log, flags=re.MULTILINE) + if matches and any(int(match) > 0 for match in matches): + return False + + # Error while running CMake + if 'CMake Error' in log or 'Configuring incomplete, errors occurred!' in log: + return False + + # Ninja failed to build some target + if 'FAILED:' in log: + return False + + return True + def main(argv): parser = argparse.ArgumentParser( description=""" @@ -31,8 +76,12 @@ with the results of the build. The script is assumed to be running inside a Buildkite agent, and as such, it assumes the existence of several environment variables that are specific -to Buildkite. It also assumes that it is running in a context where the HEAD -commit contains the Phabricator ID of the review to update. +to Buildkite. + +It also assumes that it is running in a context where the HEAD commit contains +the Phabricator ID of the review to update. If the commit does not contain the +Phabricator ID, this script is basically a no-op. This allows running the CI +on commits that are not triggered by a Phabricator review. """) args = parser.parse_args(argv) @@ -60,7 +109,7 @@ commit contains the Phabricator ID of the review to update. # Then, extract information from the environment and post-process the logs. log.seek(0) log = log.read() - result = 'fail' if 'FAILED:' in log else 'pass' + result = 'pass' if buildPassed(log) else 'fail' resultObject = { 'name': '{BUILDKITE_LABEL} ({BUILDKITE_BUILD_URL}#{BUILDKITE_JOB_ID})'.format(**os.environ), 'result': result, @@ -70,15 +119,21 @@ commit contains the Phabricator ID of the review to update. commitMessage = subprocess.check_output(['git', 'log', '--format=%B' , '-n', '1']).decode() phabricatorID = re.search(r'^Phabricator-ID:\s+(.+)$', commitMessage, flags=re.MULTILINE) - if not phabricatorID: - raise RuntimeError('Could not find the Phabricator ID in the commit message. ' - 'The commit message was:\n{}'.format(commitMessage)) - else: - phabricatorID = phabricatorID.group(1) - token = os.environ['CONDUIT_TOKEN'] - phab = phabricator.Phabricator(token=token, host=LLVM_REVIEWS_API) - phab.harbormaster.sendmessage(buildTargetPHID=phabricatorID, type=result, unit=[resultObject]) + # If there's a Phabricator ID in the commit, then the build was triggered + # by a Phabricator review -- update the results back. Otherwise, don't + # do anything. + if phabricatorID: + phabricatorID = phabricatorID.group(1) + token = os.environ['CONDUIT_TOKEN'] + phab = phabricator.Phabricator(token=token, host=LLVM_REVIEWS_API) + exponentialBackoffRetry( + lambda: phab.harbormaster.sendmessage(buildTargetPHID=phabricatorID, type=result, unit=[resultObject]), + exception=socket.timeout + ) + else: + print('The HEAD commit does not appear to be tied to a Phabricator review -- ' + 'not uploading the results to any review.') if __name__ == '__main__': main(sys.argv[1:]) From 5d19eb542db40fc5fe9f37c46246785ba5ae1e10 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Tue, 29 Sep 2020 12:50:38 -0700 Subject: [PATCH 075/544] [lldb/docs] Remove manual codesigning documentation The `macos-setup-codesign.sh` script has been in place for over two years. If there are no known issues, it's a good time to drop the manual steps from the docs. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D88257 --- lldb/docs/resources/build.rst | 57 +++-------------------------------- 1 file changed, 5 insertions(+), 52 deletions(-) diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index 579f7574dac53..e22db7f6d8f9a 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -575,8 +575,11 @@ Code Signing on macOS To use the in-tree debug server on macOS, lldb needs to be code signed. The Debug, DebugClang and Release builds are set to code sign using a code signing -certificate named ``lldb_codesign``. This document explains how to set up the -signing certificate. +certificate named ``lldb_codesign``. + +Automatic setup, run: + +* ``scripts/macos-setup-codesign.sh`` Note that it's possible to build and use lldb on macOS without setting up code signing by using the system's debug server. To configure lldb in this way with @@ -589,56 +592,6 @@ build folders that contained old signed items. The darwin kernel will cache code signing using the executable's file system node, so you will need to delete the file so the kernel clears its cache. -Automatic setup: - -* Run ``scripts/macos-setup-codesign.sh`` - -Manual setup steps: - -* Launch /Applications/Utilities/Keychain Access.app -* In Keychain Access select the ``login`` keychain in the ``Keychains`` list in - the upper left hand corner of the window. -* Select the following menu item: Keychain Access->Certificate Assistant->Create a Certificate... -* Set the following settings - -:: - - Name = lldb_codesign - Identity Type = Self Signed Root - Certificate Type = Code Signing - -* Click Create -* Click Continue -* Click Done -* Click on the "My Certificates" -* Double click on your new ``lldb_codesign`` certificate -* Turn down the "Trust" disclosure triangle, scroll to the "Code Signing" trust - pulldown menu and select "Always Trust" and authenticate as needed using your - username and password. -* Drag the new ``lldb_codesign`` code signing certificate (not the public or - private keys of the same name) from the ``login`` keychain to the ``System`` - keychain in the Keychains pane on the left hand side of the main Keychain - Access window. This will move this certificate to the ``System`` keychain. - You'll have to authorize a few more times, set it to be "Always trusted" when - asked. -* Remove ``~/Desktop/lldb_codesign.cer`` file on your desktop if there is one. -* In the Keychain Access GUI, click and drag ``lldb_codesign`` in the - ``System`` keychain onto the desktop. The drag will create a - ``Desktop/lldb_codesign.cer`` file used in the next step. -* Switch to Terminal, and run the following: - -:: - - sudo security add-trust -d -r trustRoot -p basic -p codeSign -k /Library/Keychains/System.keychain ~/Desktop/lldb_codesign.cer - rm -f ~/Desktop/lldb_codesign.cer - -* Drag the ``lldb_codesign`` certificate from the ``System`` keychain back into - the ``login`` keychain -* Quit Keychain Access -* Reboot -* Clean by removing all previously creating code signed binaries and rebuild - lldb and you should be able to debug. - When you build your LLDB for the first time, the Xcode GUI will prompt you for permission to use the ``lldb_codesign`` keychain. Be sure to click "Always Allow" on your first build. From here on out, the ``lldb_codesign`` will be From d94253b52eb1ccf08daaa281488b3903396ca9b9 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 24 Sep 2020 11:45:55 -0400 Subject: [PATCH 076/544] [libc++][ci] Turn on Phabricator reporting by default --- libcxx/utils/ci/buildkite-pipeline.yml | 29 +++++++++++--------------- libcxx/utils/ci/run-buildbot.sh | 4 ++-- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index b536e5b6f793f..db9c4c2b9fc83 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -16,66 +16,61 @@ steps: - label: "C++03" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx03" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx03 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++11" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx11" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx11 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++14" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx14" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx14 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++17" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx17" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx17 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++20" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx2a" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx2a | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "-fno-exceptions" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-noexceptions" - agents: - queue: "libcxx-builders" - - - label: "32 bits" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-32bit" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-noexceptions | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "GCC/C++20" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-gcc" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-gcc | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "ASAN" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-asan" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-asan | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "TSAN" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-tsan" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-tsan | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "UBSAN" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-ubsan" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-ubsan | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "With LLVM's libunwind" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-with_llvm_unwinder" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-with_llvm_unwinder | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "Single-threaded" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-singlethreaded" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-singlethreaded | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh index d4972b098a2cb..25cdcc3ee1649 100755 --- a/libcxx/utils/ci/run-buildbot.sh +++ b/libcxx/utils/ci/run-buildbot.sh @@ -123,5 +123,5 @@ ninja -C "${BUILD_DIR}" check-cxx echo "+++ Running the libc++abi tests" ninja -C "${BUILD_DIR}" check-cxxabi -echo "+++ Running the libc++ benchmarks" -ninja -C "${BUILD_DIR}" check-cxx-benchmarks +# echo "+++ Running the libc++ benchmarks" +# ninja -C "${BUILD_DIR}" check-cxx-benchmarks From 92e1ebeaa1fe0e5461327d071c55167733834e60 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 29 Sep 2020 13:08:22 -0700 Subject: [PATCH 077/544] [trace] Fix destructor declaration The destructor must be defined in the implementation class so that it can be called, as Vedant Kumar pointed out in: ''' What were your thoughts, re: +class Trace : public PluginInterface { +public: + ~Trace() override = default; Does this need to be `virtual ~Trace() = ...`? Otherwise, when a std::shared_ptr is destroyed, the destructor for the derived TraceIntelPT instance won't run. ''' --- lldb/include/lldb/Target/Trace.h | 2 -- lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h index e4e9b1aa88a74..0aa2da7dbad4b 100644 --- a/lldb/include/lldb/Target/Trace.h +++ b/lldb/include/lldb/Target/Trace.h @@ -35,8 +35,6 @@ namespace lldb_private { /// this information. class Trace : public PluginInterface { public: - ~Trace() override = default; - /// Dump the trace data that this plug-in has access to. /// /// This function will dump all of the trace data for all threads in a user diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h index edc781e08ad4a..d221caff3c184 100644 --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h @@ -20,6 +20,8 @@ class TraceIntelPT : public lldb_private::Trace { public: void Dump(lldb_private::Stream *s) const override; + ~TraceIntelPT() override = default; + /// PluginInterface protocol /// \{ lldb_private::ConstString GetPluginName() override; From eff9984dca033af2727ff90e22fbfb3af4ce7d4c Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 29 Sep 2020 20:19:23 +0000 Subject: [PATCH 078/544] Fix TODO in the mlir-cpu-runner/bare_ptr_call_conv.mlir test: call ops in bare-ptr calling convention is supported now (NFC) This was fixed in a89fc12653c. --- .../mlir-cpu-runner/bare_ptr_call_conv.mlir | 177 +++--------------- 1 file changed, 31 insertions(+), 146 deletions(-) diff --git a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir index caa945d11b6cd..6dc8b55762511 100644 --- a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir +++ b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir @@ -32,152 +32,37 @@ func @printF32(%arg0: f32) func @printComma() func @printNewline() -// TODO: 'main' function currently has to be provided in LLVM dialect since -// 'call' op is not yet supported by the bare pointer calling convention. The -// LLVM dialect version was generated using the following loop/std dialect -// version and minor changes around the 'simple_add1_add2_test' call. +func @main() +{ + %c2 = constant 2 : index + %c0 = constant 0 : index + %c1 = constant 1 : index + %cst = constant 1.000000e+00 : f32 + %cst_0 = constant 2.000000e+00 : f32 + %a = alloc() : memref<2xf32> + %b = alloc() : memref<2xf32> + scf.for %i = %c0 to %c2 step %c1 { + store %cst, %a[%i] : memref<2xf32> + store %cst, %b[%i] : memref<2xf32> + } + + call @simple_add1_add2_test(%a, %b) : (memref<2xf32>, memref<2xf32>) -> () -//func @main() -//{ -// %c2 = constant 2 : index -// %c0 = constant 0 : index -// %c1 = constant 1 : index -// %cst = constant 1.000000e+00 : f32 -// %cst_0 = constant 2.000000e+00 : f32 -// %a = alloc() : memref<2xf32> -// %b = alloc() : memref<2xf32> -// scf.for %i = %c0 to %c2 step %c1 { -// store %cst, %a[%i] : memref<2xf32> -// store %cst, %b[%i] : memref<2xf32> -// } -// -// call @simple_add1_add2_test(%a, %b) : (memref<2xf32>, memref<2xf32>) -> () -// -// %l0 = load %a[%c0] : memref<2xf32> -// call @printF32(%l0) : (f32) -> () -// call @printComma() : () -> () -// %l1 = load %a[%c1] : memref<2xf32> -// call @printF32(%l1) : (f32) -> () -// call @printNewline() : () -> () -// -// %l2 = load %b[%c0] : memref<2xf32> -// call @printF32(%l2) : (f32) -> () -// call @printComma() : () -> () -// %l3 = load %b[%c1] : memref<2xf32> -// call @printF32(%l3) : (f32) -> () -// call @printNewline() : () -> () -// -// dealloc %a : memref<2xf32> -// dealloc %b : memref<2xf32> -// return -//} + %l0 = load %a[%c0] : memref<2xf32> + call @printF32(%l0) : (f32) -> () + call @printComma() : () -> () + %l1 = load %a[%c1] : memref<2xf32> + call @printF32(%l1) : (f32) -> () + call @printNewline() : () -> () -llvm.func @main() { - %0 = llvm.mlir.constant(2 : index) : !llvm.i64 - %1 = llvm.mlir.constant(0 : index) : !llvm.i64 - %2 = llvm.mlir.constant(1 : index) : !llvm.i64 - %3 = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float - %4 = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float - %5 = llvm.mlir.constant(2 : index) : !llvm.i64 - %6 = llvm.mlir.null : !llvm.ptr - %7 = llvm.mlir.constant(1 : index) : !llvm.i64 - %8 = llvm.getelementptr %6[%7] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - %9 = llvm.ptrtoint %8 : !llvm.ptr to !llvm.i64 - %10 = llvm.mul %5, %9 : !llvm.i64 - %11 = llvm.call @malloc(%10) : (!llvm.i64) -> !llvm.ptr - %12 = llvm.bitcast %11 : !llvm.ptr to !llvm.ptr - %13 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %14 = llvm.insertvalue %12, %13[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %15 = llvm.insertvalue %12, %14[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %16 = llvm.mlir.constant(0 : index) : !llvm.i64 - %17 = llvm.insertvalue %16, %15[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %18 = llvm.mlir.constant(1 : index) : !llvm.i64 - %19 = llvm.insertvalue %5, %17[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %20 = llvm.insertvalue %18, %19[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %21 = llvm.mlir.constant(2 : index) : !llvm.i64 - %22 = llvm.mlir.null : !llvm.ptr - %23 = llvm.mlir.constant(1 : index) : !llvm.i64 - %24 = llvm.getelementptr %22[%23] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - %25 = llvm.ptrtoint %24 : !llvm.ptr to !llvm.i64 - %26 = llvm.mul %21, %25 : !llvm.i64 - %27 = llvm.call @malloc(%26) : (!llvm.i64) -> !llvm.ptr - %28 = llvm.bitcast %27 : !llvm.ptr to !llvm.ptr - %29 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %30 = llvm.insertvalue %28, %29[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %31 = llvm.insertvalue %28, %30[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %32 = llvm.mlir.constant(0 : index) : !llvm.i64 - %33 = llvm.insertvalue %32, %31[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %34 = llvm.mlir.constant(1 : index) : !llvm.i64 - %35 = llvm.insertvalue %21, %33[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %36 = llvm.insertvalue %34, %35[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - llvm.br ^bb1(%1 : !llvm.i64) -^bb1(%37: !llvm.i64): // 2 preds: ^bb0, ^bb2 - %38 = llvm.icmp "slt" %37, %0 : !llvm.i64 - llvm.cond_br %38, ^bb2, ^bb3 -^bb2: // pred: ^bb1 - %39 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %40 = llvm.mlir.constant(0 : index) : !llvm.i64 - %41 = llvm.mlir.constant(1 : index) : !llvm.i64 - %42 = llvm.mul %37, %41 : !llvm.i64 - %43 = llvm.add %40, %42 : !llvm.i64 - %44 = llvm.getelementptr %39[%43] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - llvm.store %3, %44 : !llvm.ptr - %45 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %46 = llvm.mlir.constant(0 : index) : !llvm.i64 - %47 = llvm.mlir.constant(1 : index) : !llvm.i64 - %48 = llvm.mul %37, %47 : !llvm.i64 - %49 = llvm.add %46, %48 : !llvm.i64 - %50 = llvm.getelementptr %45[%49] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - llvm.store %3, %50 : !llvm.ptr - %51 = llvm.add %37, %2 : !llvm.i64 - llvm.br ^bb1(%51 : !llvm.i64) -^bb3: // pred: ^bb1 - %52 = llvm.mlir.constant(1 : index) : !llvm.i64 - %53 = llvm.mlir.constant(1 : index) : !llvm.i64 - %54 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %55 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - llvm.call @simple_add1_add2_test(%54, %55) : (!llvm.ptr, !llvm.ptr) -> () - %56 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %57 = llvm.mlir.constant(0 : index) : !llvm.i64 - %58 = llvm.mlir.constant(1 : index) : !llvm.i64 - %59 = llvm.mul %1, %58 : !llvm.i64 - %60 = llvm.add %57, %59 : !llvm.i64 - %61 = llvm.getelementptr %56[%60] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - %62 = llvm.load %61 : !llvm.ptr - llvm.call @printF32(%62) : (!llvm.float) -> () - llvm.call @printComma() : () -> () - %63 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %64 = llvm.mlir.constant(0 : index) : !llvm.i64 - %65 = llvm.mlir.constant(1 : index) : !llvm.i64 - %66 = llvm.mul %2, %65 : !llvm.i64 - %67 = llvm.add %64, %66 : !llvm.i64 - %68 = llvm.getelementptr %63[%67] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - %69 = llvm.load %68 : !llvm.ptr - llvm.call @printF32(%69) : (!llvm.float) -> () - llvm.call @printNewline() : () -> () - %70 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %71 = llvm.mlir.constant(0 : index) : !llvm.i64 - %72 = llvm.mlir.constant(1 : index) : !llvm.i64 - %73 = llvm.mul %1, %72 : !llvm.i64 - %74 = llvm.add %71, %73 : !llvm.i64 - %75 = llvm.getelementptr %70[%74] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - %76 = llvm.load %75 : !llvm.ptr - llvm.call @printF32(%76) : (!llvm.float) -> () - llvm.call @printComma() : () -> () - %77 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %78 = llvm.mlir.constant(0 : index) : !llvm.i64 - %79 = llvm.mlir.constant(1 : index) : !llvm.i64 - %80 = llvm.mul %2, %79 : !llvm.i64 - %81 = llvm.add %78, %80 : !llvm.i64 - %82 = llvm.getelementptr %77[%81] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr - %83 = llvm.load %82 : !llvm.ptr - llvm.call @printF32(%83) : (!llvm.float) -> () - llvm.call @printNewline() : () -> () - %84 = llvm.extractvalue %20[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %85 = llvm.bitcast %84 : !llvm.ptr to !llvm.ptr - llvm.call @free(%85) : (!llvm.ptr) -> () - %86 = llvm.extractvalue %36[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> - %87 = llvm.bitcast %86 : !llvm.ptr to !llvm.ptr - llvm.call @free(%87) : (!llvm.ptr) -> () - llvm.return + %l2 = load %b[%c0] : memref<2xf32> + call @printF32(%l2) : (f32) -> () + call @printComma() : () -> () + %l3 = load %b[%c1] : memref<2xf32> + call @printF32(%l3) : (f32) -> () + call @printNewline() : () -> () + + dealloc %a : memref<2xf32> + dealloc %b : memref<2xf32> + return } From d0667562e127925fb845124302ae31f10b3a849d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Sep 2020 16:21:20 -0400 Subject: [PATCH 079/544] [libc++] Fix some test failures in unusual configurations --- .../facet.num.put.members/put_long_double.pass.cpp | 2 +- libcxxabi/test/incomplete_type.sh.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp index 0740271292742..fcb7aafa48ac9 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp @@ -24415,7 +24415,7 @@ void test12() output_iterator iter; std::locale lc = std::locale::classic(); std::locale lg(lc, new my_numpunct); -#ifdef __APPLE__ +#if defined(__APPLE__) && defined(__x86_64__) // This test is failing on FreeBSD, possibly due to different representations // of the floating point numbers. const my_facet f(1); diff --git a/libcxxabi/test/incomplete_type.sh.cpp b/libcxxabi/test/incomplete_type.sh.cpp index 41d003a755953..fba24b2092563 100644 --- a/libcxxabi/test/incomplete_type.sh.cpp +++ b/libcxxabi/test/incomplete_type.sh.cpp @@ -14,6 +14,7 @@ // addresses. // UNSUPPORTED: no-exceptions +// UNSUPPORTED: -fno-rtti // NOTE: Link libc++abi explicitly and before libc++ so that libc++ doesn't drag // in the system libc++abi installation on OS X. (DYLD_LIBRARY_PATH is ignored From 3c7070f1a6b89277fce042a943cd83fa65507a67 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 13:26:09 -0700 Subject: [PATCH 080/544] [lldb] Hoist --server argument out of LLDB_TEST_COMMON_ARGS (NFC) Give the server argument its own variable (LLDB_TEST_SERVER) so that we can configure it in lit.site.cfg.py if we so desire. --- lldb/test/API/CMakeLists.txt | 6 ++++-- lldb/test/API/lit.cfg.py | 3 +++ lldb/test/API/lit.site.cfg.py.in | 2 ++ lldb/utils/lldb-dotest/CMakeLists.txt | 5 +++++ lldb/utils/lldb-dotest/lldb-dotest.in | 6 +++++- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index 6c7f54e39123c..fe92012e37678 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -119,12 +119,12 @@ if(CMAKE_HOST_APPLE) elseif(TARGET debugserver) set(debugserver_path ${LLVM_RUNTIME_OUTPUT_INTDIR}/debugserver) message(STATUS "LLDB Tests use just-built debugserver: ${debugserver_path}") - list(APPEND LLDB_TEST_COMMON_ARGS --server ${debugserver_path}) + set(LLDB_TEST_SERVER ${debugserver_path}) add_lldb_test_dependency(debugserver) elseif(TARGET lldb-server) set(lldb_server_path ${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb-server) message(STATUS "LLDB Tests use just-built lldb-server: ${lldb_server_path}") - list(APPEND LLDB_TEST_COMMON_ARGS --server ${lldb_server_path}) + set(LLDB_TEST_SERVER ${lldb_server_path}) add_lldb_test_dependency(lldb-server) else() message(WARNING "LLDB Tests enabled, but no server available") @@ -146,6 +146,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_FILECHECK "${LLDB_TEST_FILECHECK}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_YAML2OBJ "${LLDB_TEST_YAML2OBJ}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_SERVER "${LLDB_TEST_SERVER}") # Remaining ones must be paths to the provided LLVM build-tree. if(LLVM_CONFIGURATION_TYPES) @@ -174,6 +175,7 @@ string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_COMPILER string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_FILECHECK "${LLDB_TEST_FILECHECK}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_YAML2OBJ "${LLDB_TEST_YAML2OBJ}") +string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_SERVER "${LLDB_TEST_SERVER}") # Configure the API test suite. configure_lit_site_cfg( diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 893418cc16d3a..d78a1aae54675 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -200,6 +200,9 @@ def delete_module_cache(path): if is_configured('yaml2obj'): dotest_cmd += ['--yaml2obj', config.yaml2obj] +if is_configured('server'): + dotest_cmd += ['--server', config.server] + if is_configured('lldb_libs_dir'): dotest_cmd += ['--lldb-libs-dir', config.lldb_libs_dir] diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 144d17965b9ad..271faf371f9d1 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -31,6 +31,7 @@ config.test_compiler = '@LLDB_TEST_COMPILER@' config.dsymutil = '@LLDB_TEST_DSYMUTIL@' config.filecheck = '@LLDB_TEST_FILECHECK@' config.yaml2obj = '@LLDB_TEST_YAML2OBJ@' +config.server = '@LLDB_TEST_SERVER@' # The API tests use their own module caches. config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") @@ -59,6 +60,7 @@ try: config.dsymutil = config.dsymutil % lit_config.params config.filecheck = config.filecheck % lit_config.params config.yaml2obj = config.yaml2obj % lit_config.params + config.server = config.server % lit_config.params config.lldb_framework_dir = config.lldb_framework_dir % lit_config.params config.dotest_args_str = config.dotest_args_str % lit_config.params except KeyError as e: diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index 2f9ba72d7b223..1001fbf04ebe7 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -28,6 +28,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_SERVER_CONFIGURED "${LLDB_TEST_SERVER}") # Remaining ones must be paths to the provided LLVM build-tree. if(${config_type} IN_LIST LLVM_CONFIGURATION_TYPES) @@ -41,6 +42,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_SERVER_CONFIGURED "${LLDB_TEST_SERVER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") else() # Single-configuration generator like Ninja. @@ -53,6 +55,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") + string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_SERVER_CONFIGURED "${LLDB_TEST_SERVER}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") endif() @@ -73,6 +76,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_SERVER_CONFIGURED "${LLDB_TEST_SERVER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") configure_file( @@ -90,6 +94,7 @@ else() set(LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") set(LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}") set(LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}") + set(LLDB_TEST_SERVER_CONFIGURED "${LLDB_TEST_SERVER}") set(LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}") configure_file( diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in index fedb56e938fe4..cfd73f5b32a6e 100755 --- a/lldb/utils/lldb-dotest/lldb-dotest.in +++ b/lldb/utils/lldb-dotest/lldb-dotest.in @@ -11,6 +11,7 @@ compiler = '@LLDB_TEST_COMPILER_CONFIGURED@' dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@' filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@' yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@' +server = '@LLDB_TEST_SERVER_CONFIGURED@' lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@" lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@" @@ -29,7 +30,10 @@ if __name__ == '__main__': cmd.extend(['--yaml2obj', yaml2obj]) cmd.extend(['--filecheck', filecheck]) cmd.extend(['--lldb-libs-dir', lldb_libs_dir]) - cmd.extend(['--framework', lldb_framework_dir]) + if server: + cmd.extend(['--server', server]) + if lldb_framework_dir: + cmd.extend(['--framework', lldb_framework_dir]) if lldb_build_intel_pt == "1": cmd.extend(['--enable-plugin', 'intel-pt']) cmd.extend(wrapper_args) From f9e70fa546a461b3c9fa8463efcd9f7fe989bd9f Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Sep 2020 16:28:41 -0400 Subject: [PATCH 081/544] [libc++] Rename the -fno-rtti Lit feature to just no-rtti This is consistent to the way we name other Lit features, and it removes the possibility for confusing the Lit feature with the actual compiler flag. --- .../support.rtti/type.info/type_info.comparison.merged.sh.cpp | 2 +- .../type.info/type_info.comparison.unmerged.sh.cpp | 2 +- .../support.rtti/type.info/type_info.pass.cpp | 2 +- .../support.rtti/type.info/type_info_hash.pass.cpp | 2 +- .../std/utilities/any/any.class/any.observers/type.pass.cpp | 2 +- .../func.wrap.func/func.wrap.func.targ/target.pass.cpp | 2 +- .../func.wrap.func/func.wrap.func.targ/target_type.pass.cpp | 2 +- .../util.smartptr.getdeleter/get_deleter.pass.cpp | 2 +- .../util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp | 2 +- .../std/utilities/type.index/type.index.hash/hash.pass.cpp | 2 +- .../std/utilities/type.index/type.index.members/ctor.pass.cpp | 2 +- .../std/utilities/type.index/type.index.members/eq.pass.cpp | 2 +- .../type.index/type.index.members/hash_code.pass.cpp | 2 +- .../std/utilities/type.index/type.index.members/lt.pass.cpp | 2 +- .../std/utilities/type.index/type.index.members/name.pass.cpp | 2 +- .../type.index/type.index.overview/copy_assign.pass.cpp | 2 +- .../type.index/type.index.overview/copy_ctor.pass.cpp | 2 +- .../type.index/type.index.synopsis/hash_type_index.pass.cpp | 2 +- .../test.support/test_macros_header.no_rtti.verify.cpp | 4 ++-- .../support/test.support/test_macros_header.rtti.pass.cpp | 4 ++-- libcxx/utils/libcxx/test/params.py | 2 +- libcxxabi/test/incomplete_type.sh.cpp | 2 +- 22 files changed, 24 insertions(+), 24 deletions(-) diff --git a/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.merged.sh.cpp b/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.merged.sh.cpp index ae6efea8b17ba..e4cf05ac72c0d 100644 --- a/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.merged.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.merged.sh.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti // RUN: %{cxx} %s %{flags} %{compile_flags} -c -o %t.tu1.o -DTU1 -D_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=1 // RUN: %{cxx} %s %{flags} %{compile_flags} -c -o %t.tu2.o -DTU2 -D_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=1 diff --git a/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.unmerged.sh.cpp b/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.unmerged.sh.cpp index 1fb9bc3a9b418..3ce47dbb02961 100644 --- a/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.unmerged.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.rtti/type.info/type_info.comparison.unmerged.sh.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti // RUN: %{cxx} %s %{flags} %{compile_flags} -c -o %t.tu1.o -DTU1 -D_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=2 // RUN: %{cxx} %s %{flags} %{compile_flags} -c -o %t.tu2.o -DTU2 -D_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=2 diff --git a/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp b/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp index ce169087ea554..2422db658964c 100644 --- a/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp +++ b/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp @@ -8,7 +8,7 @@ // test type_info -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp b/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp index ea47e2f145f3d..f173495dac7db 100644 --- a/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp +++ b/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp @@ -8,7 +8,7 @@ // test type_info -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp index 4a355c29cd1d9..3c951f81143d2 100644 --- a/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14 -// XFAIL: -fno-rtti +// XFAIL: no-rtti // diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp index cc238ddef5fe1..5a2b3647c6e18 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp @@ -22,7 +22,7 @@ // This test runs in C++03, but we have deprecated using std::function in C++03. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp index ca3a6a566f139..d6b3bf3003c9a 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp @@ -15,7 +15,7 @@ // This test runs in C++03, but we have deprecated using std::function in C++03. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp index 0da513d256dbd..bc0b87613b95a 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp @@ -12,7 +12,7 @@ // template D* get_deleter(const shared_ptr& p); -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp index 77eb96dd97bd8..6059ff8b562fe 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp @@ -12,7 +12,7 @@ // template shared_ptr dynamic_pointer_cast(const shared_ptr& r); -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp index 35ba51723c89d..aae9472856607 100644 --- a/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp @@ -17,7 +17,7 @@ // size_t operator()(type_index index) const; // }; -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp index e1ed6b8c0a7f8..202d818b96e69 100644 --- a/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp @@ -12,7 +12,7 @@ // type_index(const type_info& rhs); -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp index 5224183ac1c9d..f64e2a7cb6c83 100644 --- a/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp @@ -13,7 +13,7 @@ // bool operator==(const type_index& rhs) const; // bool operator!=(const type_index& rhs) const; -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp index 6c99038435d89..0311d4e00e7ca 100644 --- a/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp @@ -12,7 +12,7 @@ // size_t hash_code() const; -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp index 1a96ca0f6ce00..360793864048c 100644 --- a/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp @@ -15,7 +15,7 @@ // bool operator> (const type_index& rhs) const; // bool operator>=(const type_index& rhs) const; -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp index e03a9262c6904..110159355bad7 100644 --- a/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp @@ -12,7 +12,7 @@ // const char* name() const; -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp index 80eeef6eee055..7260f4282d4b1 100644 --- a/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp @@ -12,7 +12,7 @@ // type_index& operator=(const type_index& ti); -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp index 6a2c47c5e5764..f55263518269b 100644 --- a/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp @@ -12,7 +12,7 @@ // type_index(const type_index& ti); -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp index 97b0aa4ed9a70..70a2e80b656c6 100644 --- a/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp @@ -14,7 +14,7 @@ // size_t operator()(type_index index) const; // }; -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include #include diff --git a/libcxx/test/support/test.support/test_macros_header.no_rtti.verify.cpp b/libcxx/test/support/test.support/test_macros_header.no_rtti.verify.cpp index 4be5e9046de9b..a7f35577c6beb 100644 --- a/libcxx/test/support/test.support/test_macros_header.no_rtti.verify.cpp +++ b/libcxx/test/support/test.support/test_macros_header.no_rtti.verify.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -// Make sure the TEST_HAS_NO_RTTI macro is defined when the -fno-rtti feature +// Make sure the TEST_HAS_NO_RTTI macro is defined when the no-rtti Lit feature // is defined. -// REQUIRES: -fno-rtti +// REQUIRES: no-rtti #include "test_macros.h" diff --git a/libcxx/test/support/test.support/test_macros_header.rtti.pass.cpp b/libcxx/test/support/test.support/test_macros_header.rtti.pass.cpp index 4c602a50b54a7..ff8271cdd920d 100644 --- a/libcxx/test/support/test.support/test_macros_header.rtti.pass.cpp +++ b/libcxx/test/support/test.support/test_macros_header.rtti.pass.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -// Make sure the TEST_HAS_NO_RTTI macro is NOT defined when the -fno-rtti +// Make sure the TEST_HAS_NO_RTTI macro is NOT defined when the no-rtti Lit // feature isn't defined. -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti #include "test_macros.h" diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index 773890b015097..175074a169b63 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -27,7 +27,7 @@ Parameter(name='enable_rtti', choices=[True, False], type=bool, default=True, help="Whether to enable RTTI when compiling the test suite.", feature=lambda rtti: None if rtti else - Feature(name='-fno-rtti', compileFlag='-fno-rtti')), + Feature(name='no-rtti', compileFlag='-fno-rtti')), Parameter(name='stdlib', choices=['libc++', 'libstdc++', 'msvc'], type=str, default='libc++', help="The C++ Standard Library implementation being tested.", diff --git a/libcxxabi/test/incomplete_type.sh.cpp b/libcxxabi/test/incomplete_type.sh.cpp index fba24b2092563..5521a1092863e 100644 --- a/libcxxabi/test/incomplete_type.sh.cpp +++ b/libcxxabi/test/incomplete_type.sh.cpp @@ -14,7 +14,7 @@ // addresses. // UNSUPPORTED: no-exceptions -// UNSUPPORTED: -fno-rtti +// UNSUPPORTED: no-rtti // NOTE: Link libc++abi explicitly and before libc++ so that libc++ doesn't drag // in the system libc++abi installation on OS X. (DYLD_LIBRARY_PATH is ignored From 538762fef0b662048be2a261ebc12da249efa977 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Sep 2020 16:27:51 -0400 Subject: [PATCH 082/544] Better diagnostics for anonymous bit-fields with attributes or an initializer. The current C++ grammar allows an anonymous bit-field with an attribute, but this is ambiguous (the attribute in that case could appertain to the type instead of the bit-field). The current thinking in the Core Working Group is that it's better to disallow attributes in that position at the grammar level so that the ambiguity resolves in favor of applying to the type. During discussions about the behavior of the attribute, the Core Working Group also felt it was better to disallow anonymous bit-fields from specifying a default member initializer. This implements both sets of related grammar changes. --- .../clang/Basic/DiagnosticParseKinds.td | 2 ++ .../clang/Basic/DiagnosticSemaKinds.td | 2 -- clang/lib/Parse/ParseDecl.cpp | 7 +++++- clang/lib/Parse/ParseDeclCXX.cpp | 24 ++++++++++++++++--- clang/lib/Sema/SemaDecl.cpp | 8 ------- clang/test/CXX/class/class.bit/p1.cpp | 19 +++++++++++++++ clang/test/Parser/MicrosoftExtensions.cpp | 2 +- clang/test/Parser/c2x-attributes.c | 3 +++ 8 files changed, 52 insertions(+), 15 deletions(-) create mode 100644 clang/test/CXX/class/class.bit/p1.cpp diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index da4e1725269ff..78d3a08b30287 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -872,6 +872,8 @@ def ext_bitfield_member_init: ExtWarn< def warn_cxx17_compat_bitfield_member_init: Warning< "default member initializer for bit-field is incompatible with " "C++ standards before C++20">, InGroup, DefaultIgnore; +def err_anon_bitfield_member_init : Error< + "anonymous bit-field cannot have a default member initializer">; def err_incomplete_array_member_init: Error< "array bound cannot be deduced from a default member initializer">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ed11e0d1ce3c2..4562d1114694f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5619,8 +5619,6 @@ def err_bitfield_width_exceeds_type_width : Error< def err_anon_bitfield_width_exceeds_type_width : Error< "width of anonymous bit-field (%0 bits) exceeds %select{width|size}1 " "of its type (%2 bit%s2)">; -def err_anon_bitfield_init : Error< - "anonymous bit-field cannot have a default member initializer">; def err_incorrect_number_of_vector_initializers : Error< "number of elements must be either one or match the size of the vector">; diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index adec7c6076823..3f314c59ade6e 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4113,8 +4113,13 @@ void Parser::ParseStructDeclaration( DeclaratorInfo.D.setCommaLoc(CommaLoc); // Attributes are only allowed here on successive declarators. - if (!FirstDeclarator) + if (!FirstDeclarator) { + // However, this does not apply for [[]] attributes (which could show up + // before or after the __attribute__ attributes). + DiagnoseAndSkipCXX11Attributes(); MaybeParseGNUAttributes(DeclaratorInfo.D); + DiagnoseAndSkipCXX11Attributes(); + } /// struct-declarator: declarator /// struct-declarator: declarator[opt] ':' constant-expression diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 059d875d683df..a903896f172c5 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -2305,10 +2305,15 @@ bool Parser::ParseCXXMemberDeclaratorBeforeInitializer( Declarator &DeclaratorInfo, VirtSpecifiers &VS, ExprResult &BitfieldSize, LateParsedAttrList &LateParsedAttrs) { // member-declarator: - // declarator pure-specifier[opt] + // declarator virt-specifier-seq[opt] pure-specifier[opt] // declarator requires-clause // declarator brace-or-equal-initializer[opt] - // identifier[opt] ':' constant-expression + // identifier attribute-specifier-seq[opt] ':' constant-expression + // brace-or-equal-initializer[opt] + // ':' constant-expression + // + // NOTE: the latter two productions are a proposed bugfix rather than the + // current grammar rules as of C++20. if (Tok.isNot(tok::colon)) ParseDeclarator(DeclaratorInfo); else @@ -2342,7 +2347,11 @@ bool Parser::ParseCXXMemberDeclaratorBeforeInitializer( } // If attributes exist after the declarator, but before an '{', parse them. + // However, this does not apply for [[]] attributes (which could show up + // before or after the __attribute__ attributes). + DiagnoseAndSkipCXX11Attributes(); MaybeParseGNUAttributes(DeclaratorInfo, &LateParsedAttrs); + DiagnoseAndSkipCXX11Attributes(); // For compatibility with code written to older Clang, also accept a // virt-specifier *after* the GNU attributes. @@ -2784,7 +2793,12 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, InClassInitStyle HasInClassInit = ICIS_NoInit; bool HasStaticInitializer = false; if (Tok.isOneOf(tok::equal, tok::l_brace) && PureSpecLoc.isInvalid()) { - if (DeclaratorInfo.isDeclarationOfFunction()) { + // DRXXXX: Anonymous bit-fields cannot have a brace-or-equal-initializer. + if (BitfieldSize.isUsable() && !DeclaratorInfo.hasName()) { + // Diagnose the error and pretend there is no in-class initializer. + Diag(Tok, diag::err_anon_bitfield_member_init); + SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch); + } else if (DeclaratorInfo.isDeclarationOfFunction()) { // It's a pure-specifier. if (!TryConsumePureSpecifier(/*AllowFunctionDefinition*/ false)) // Parse it as an expression so that Sema can diagnose it. @@ -2933,7 +2947,11 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, DeclaratorInfo.setCommaLoc(CommaLoc); // GNU attributes are allowed before the second and subsequent declarator. + // However, this does not apply for [[]] attributes (which could show up + // before or after the __attribute__ attributes). + DiagnoseAndSkipCXX11Attributes(); MaybeParseGNUAttributes(DeclaratorInfo); + DiagnoseAndSkipCXX11Attributes(); if (ParseCXXMemberDeclaratorBeforeInitializer( DeclaratorInfo, VS, BitfieldSize, LateParsedAttrs)) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index aff49b7ddb903..1c3c484196803 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -16708,14 +16708,6 @@ FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T, BitWidth = nullptr; ZeroWidth = false; } - - // Only data members can have in-class initializers. - if (BitWidth && !II && InitStyle) { - Diag(Loc, diag::err_anon_bitfield_init); - InvalidDecl = true; - BitWidth = nullptr; - ZeroWidth = false; - } } // Check that 'mutable' is consistent with the type of the declaration. diff --git a/clang/test/CXX/class/class.bit/p1.cpp b/clang/test/CXX/class/class.bit/p1.cpp new file mode 100644 index 0000000000000..ab15e3a356cfc --- /dev/null +++ b/clang/test/CXX/class/class.bit/p1.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s + +// Test various bit-field member declarations. +constexpr int foo() { return 1; } +struct A { + int a [[]] : 1; + int b, [[]] : 0; // expected-error {{an attribute list cannot appear here}} + int [[]] : 0; // OK, attribute applies to the type. + int [[]] c : 1; // OK, attribute applies to the type. + int : 2 = 1; // expected-error {{anonymous bit-field cannot have a default member initializer}} + int : 0 { 1 }; // expected-error {{anonymous bit-field cannot have a default member initializer}} + int : 0, d : 1 = 1; + int : 1 = 12, e : 1; // expected-error {{anonymous bit-field cannot have a default member initializer}} + int : 0, f : 1 = 1; + int g [[]] : 1 = 1; + int h [[]] : 1 {1}; + int i : foo() = foo(); + int j, [[]] k; // expected-error {{an attribute list cannot appear here}} +}; diff --git a/clang/test/Parser/MicrosoftExtensions.cpp b/clang/test/Parser/MicrosoftExtensions.cpp index 52f40677a1350..9706eecce2db2 100644 --- a/clang/test/Parser/MicrosoftExtensions.cpp +++ b/clang/test/Parser/MicrosoftExtensions.cpp @@ -466,6 +466,6 @@ namespace enum_class { // MSVC produces a "C4353 constant 0 as function expression" for this, // considering the final {} to be part of the bit-width. We follow P0683R1 // and treat it as a default member initializer. - enum E : int : int{}{}; // expected-error {{anonymous bit-field cannot have a default member initializer}} expected-warning {{C++20 extension}} + enum E : int : int{}{}; // expected-error {{anonymous bit-field cannot have a default member initializer}} }; } diff --git a/clang/test/Parser/c2x-attributes.c b/clang/test/Parser/c2x-attributes.c index 97f17ad4e7c1e..393506e867fec 100644 --- a/clang/test/Parser/c2x-attributes.c +++ b/clang/test/Parser/c2x-attributes.c @@ -23,6 +23,9 @@ struct [[]] S1 { int l[[]][10]; [[]] int m, n; int o [[]] : 12; + int [[]] : 0; // OK, attribute applies to the type. + int p, [[]] : 0; // expected-error {{an attribute list cannot appear here}} + int q, [[]] r; // expected-error {{an attribute list cannot appear here}} }; [[]] struct S2 { int a; }; // expected-error {{misplaced attributes}} From 155d2d5300551c6ac26eaeef259d73d62f939d0b Mon Sep 17 00:00:00 2001 From: Chris Hamilton Date: Tue, 29 Sep 2020 22:34:46 +0200 Subject: [PATCH 083/544] Revert "[Sema] Address-space sensitive check for unbounded arrays (v2)" This reverts commit d9ee935679e7164d1c47e351bbbcf5c25742b59c. --- .../clang/Basic/DiagnosticSemaKinds.td | 8 -- clang/lib/Sema/SemaChecking.cpp | 89 +++---------------- clang/test/Sema/const-eval.c | 8 +- clang/test/Sema/unbounded-array-bounds.c | 80 ----------------- .../SemaCXX/constant-expression-cxx1y.cpp | 3 +- 5 files changed, 18 insertions(+), 170 deletions(-) delete mode 100644 clang/test/Sema/unbounded-array-bounds.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 4562d1114694f..f29eec316971d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8915,14 +8915,6 @@ def warn_array_index_precedes_bounds : Warning< def warn_array_index_exceeds_bounds : Warning< "array index %0 is past the end of the array (which contains %1 " "element%s2)">, InGroup; -def warn_ptr_arith_exceeds_max_addressable_bounds : Warning< - "the pointer incremented by %0 refers past the last possible element for an array in %1-bit " - "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, - InGroup; -def warn_array_index_exceeds_max_addressable_bounds : Warning< - "array index %0 refers past the last possible element for an array in %1-bit " - "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">, - InGroup; def note_array_declared_here : Note< "array %0 declared here">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a5de6a5c88db9..eeb3222624005 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14057,11 +14057,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, const ConstantArrayType *ArrayTy = Context.getAsConstantArrayType(BaseExpr->getType()); - const Type *BaseType = - ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr(); - bool IsUnboundedArray = (BaseType == nullptr); - if (EffectiveType->isDependentType() || - (!IsUnboundedArray && BaseType->isDependentType())) + if (!ArrayTy) + return; + + const Type *BaseType = ArrayTy->getElementType().getTypePtr(); + if (EffectiveType->isDependentType() || BaseType->isDependentType()) return; Expr::EvalResult Result; @@ -14069,10 +14069,8 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, return; llvm::APSInt index = Result.Val.getInt(); - if (IndexNegated) { - index.setIsUnsigned(false); + if (IndexNegated) index = -index; - } const NamedDecl *ND = nullptr; if (const DeclRefExpr *DRE = dyn_cast(BaseExpr)) @@ -14080,69 +14078,6 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); - if (IsUnboundedArray) { - if (index.isUnsigned() || !index.isNegative()) { - const auto &ASTC = getASTContext(); - unsigned AddrBits = - ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace( - EffectiveType->getCanonicalTypeInternal())); - if (index.getBitWidth() < AddrBits) - index = index.zext(AddrBits); - CharUnits ElemCharUnits = ASTC.getTypeSizeInChars(EffectiveType); - llvm::APInt ElemBytes(index.getBitWidth(), ElemCharUnits.getQuantity()); - // If index has more active bits than address space, we already know - // we have a bounds violation to warn about. Otherwise, compute - // address of (index + 1)th element, and warn about bounds violation - // only if that address exceeds address space. - if (index.getActiveBits() <= AddrBits) { - bool Overflow; - llvm::APInt Product(index); - Product += 1; - Product = Product.umul_ov(ElemBytes, Overflow); - if (!Overflow && Product.getActiveBits() <= AddrBits) - return; - } - - // Need to compute max possible elements in address space, since that - // is included in diag message. - llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits); - MaxElems = MaxElems.zext(std::max(AddrBits + 1, ElemBytes.getBitWidth())); - MaxElems += 1; - ElemBytes = ElemBytes.zextOrTrunc(MaxElems.getBitWidth()); - MaxElems = MaxElems.udiv(ElemBytes); - - unsigned DiagID = - ASE ? diag::warn_array_index_exceeds_max_addressable_bounds - : diag::warn_ptr_arith_exceeds_max_addressable_bounds; - - // Diag message shows element size in bits and in "bytes" (platform- - // dependent CharUnits) - DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, - PDiag(DiagID) - << index.toString(10, true) << AddrBits - << (unsigned)ASTC.toBits(ElemCharUnits) - << ElemBytes.toString(10, false) - << MaxElems.toString(10, false) - << (unsigned)MaxElems.getLimitedValue(~0U) - << IndexExpr->getSourceRange()); - - if (!ND) { - // Try harder to find a NamedDecl to point at in the note. - while (const auto *ASE = dyn_cast(BaseExpr)) - BaseExpr = ASE->getBase()->IgnoreParenCasts(); - if (const auto *DRE = dyn_cast(BaseExpr)) - ND = DRE->getDecl(); - if (const auto *ME = dyn_cast(BaseExpr)) - ND = ME->getMemberDecl(); - } - - if (ND) - DiagRuntimeBehavior(ND->getBeginLoc(), BaseExpr, - PDiag(diag::note_array_declared_here) << ND); - } - return; - } - if (index.isUnsigned() || !index.isNegative()) { // It is possible that the type of the base expression after // IgnoreParenCasts is incomplete, even though the type of the base @@ -14205,8 +14140,9 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, } } - unsigned DiagID = ASE ? diag::warn_array_index_exceeds_bounds - : diag::warn_ptr_arith_exceeds_bounds; + unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; + if (ASE) + DiagID = diag::warn_array_index_exceeds_bounds; DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr, PDiag(DiagID) << index.toString(10, true) @@ -14227,11 +14163,12 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, if (!ND) { // Try harder to find a NamedDecl to point at in the note. - while (const auto *ASE = dyn_cast(BaseExpr)) + while (const ArraySubscriptExpr *ASE = + dyn_cast(BaseExpr)) BaseExpr = ASE->getBase()->IgnoreParenCasts(); - if (const auto *DRE = dyn_cast(BaseExpr)) + if (const DeclRefExpr *DRE = dyn_cast(BaseExpr)) ND = DRE->getDecl(); - if (const auto *ME = dyn_cast(BaseExpr)) + if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = ME->getMemberDecl(); } diff --git a/clang/test/Sema/const-eval.c b/clang/test/Sema/const-eval.c index c94539ab1de27..bbcbb0e25237e 100644 --- a/clang/test/Sema/const-eval.c +++ b/clang/test/Sema/const-eval.c @@ -140,10 +140,10 @@ EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a con // We evaluate these by providing 2s' complement semantics in constant // expressions, like we do for integers. -void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; // expected-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}} -void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; // expected-warning {{refers past the last possible element}} -__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}} -void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; // expected-warning {{refers past the last possible element}} +void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; +void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; +__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; +void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; struct PR35214_X { int k; diff --git a/clang/test/Sema/unbounded-array-bounds.c b/clang/test/Sema/unbounded-array-bounds.c deleted file mode 100644 index d47463ff94345..0000000000000 --- a/clang/test/Sema/unbounded-array-bounds.c +++ /dev/null @@ -1,80 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s \ -// RUN: --implicit-check-not 'past the last possible element' -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \ -// RUN: --implicit-check-not 'past the last possible element' -// RUN: %clang_cc1 -triple avr-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s \ -// RUN: --implicit-check-not 'past the last possible element' - -struct S { - long long a; - char b; - long long c; - short d; -}; - -struct S s[]; - -void f1() { - ++s[3].a; - ++s[7073650413200313099].b; - // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: array index 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) - ++s[7073650].c; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) -} - -long long ll[]; - -void f2() { - ++ll[3]; - ++ll[2705843009213693952]; - // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) - ++ll[847073650]; - // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements) - // CHECK-AVR-ADDR16: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements) -} - -void f3(struct S p[]) { - ++p[3].a; - ++p[7073650413200313099].b; - // CHECK-X86-ADDR64: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) - ++p[7073650].c; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) -} - -void f4(struct S *p) { - p += 3; - p += 7073650413200313099; - // CHECK-X86-ADDR64: :[[@LINE-1]]:3: warning: the pointer incremented by 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements) - // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements) - // CHECK-AVR-ADDR16: :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) - p += 7073650; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements) -} - -struct BQ { - struct S bigblock[3276]; -}; - -struct BQ bq[]; - -void f5() { - ++bq[0].bigblock[0].a; - ++bq[1].bigblock[0].a; - // CHECK-AVR-ADDR16: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element) -} - -void f6() { - int ints[] = {1, 3, 5, 7, 8, 6, 4, 5, 9}; - int const n_ints = sizeof(ints) / sizeof(int); - unsigned long long const N = 3; - - int *middle = &ints[0] + n_ints / 2; - // Should NOT produce a warning. - *(middle + 5 - N) = 22; -} diff --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp index 7fe71d4853508..8bc4f88a63a96 100644 --- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp @@ -1018,9 +1018,8 @@ constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant } constexpr void PR28739(int n) { // expected-error {{never produces a constant}} - int *p = &n; // expected-note {{declared here}} + int *p = &n; p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}} - // expected-warning@-1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}} } constexpr void Void(int n) { From 01a30fa6787d8375e1df573150f9927561b0a0f1 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 29 Sep 2020 21:51:14 +0100 Subject: [PATCH 084/544] [clangd] Trivial setter support when moving items to fields Extend the Trivial setter documentation to support cases where the value is moved into a field using `std::move`. Reviewed By: sammccall, kadircet Differential Revision: https://reviews.llvm.org/D88297 --- clang-tools-extra/clangd/Hover.cpp | 14 +++++++++++ .../clangd/unittests/HoverTests.cpp | 24 +++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index ef9bb536005fb..9da8a9c9af57f 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -413,6 +413,8 @@ llvm::Optional getterVariableName(const CXXMethodDecl *CMD) { // If CMD is one of the forms: // void foo(T arg) { FieldName = arg; } // R foo(T arg) { FieldName = arg; return *this; } +// void foo(T arg) { FieldName = std::move(arg); } +// R foo(T arg) { FieldName = std::move(arg); return *this; } // then returns "FieldName" llvm::Optional setterVariableName(const CXXMethodDecl *CMD) { assert(CMD->hasBody()); @@ -455,6 +457,18 @@ llvm::Optional setterVariableName(const CXXMethodDecl *CMD) { } else { return llvm::None; } + + // Detect the case when the item is moved into the field. + if (auto *CE = llvm::dyn_cast(RHS->IgnoreCasts())) { + if (CE->getNumArgs() != 1) + return llvm::None; + auto *ND = llvm::dyn_cast(CE->getCalleeDecl()); + if (!ND || !ND->getIdentifier() || ND->getName() != "move" || + !ND->isInStdNamespace()) + return llvm::None; + RHS = CE->getArg(0); + } + auto *DRE = llvm::dyn_cast(RHS->IgnoreCasts()); if (!DRE || DRE->getDecl() != Arg) return llvm::None; diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 636e5d99be522..48c0fef45ab85 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -698,6 +698,26 @@ class Foo {})cpp"; HI.Parameters->back().Name = "v"; HI.AccessSpecifier = "public"; }}, + {// Setter (move) + R"cpp( + namespace std { template T&& move(T&& t); } + struct X { int Y; void [[^setY]](float v) { Y = std::move(v); } }; + )cpp", + [](HoverInfo &HI) { + HI.Name = "setY"; + HI.Kind = index::SymbolKind::InstanceMethod; + HI.NamespaceScope = ""; + HI.Definition = "void setY(float v)"; + HI.LocalScope = "X::"; + HI.Documentation = "Trivial setter for `Y`."; + HI.Type = "void (float)"; + HI.ReturnType = "void"; + HI.Parameters.emplace(); + HI.Parameters->emplace_back(); + HI.Parameters->back().Type = "float"; + HI.Parameters->back().Name = "v"; + HI.AccessSpecifier = "public"; + }}, {// Field type initializer. R"cpp( struct X { int x = 2; }; @@ -802,8 +822,8 @@ class Foo {})cpp"; HI.Type = "int"; HI.AccessSpecifier = "public"; }}, - {// No crash on InitListExpr. - R"cpp( + {// No crash on InitListExpr. + R"cpp( struct Foo { int a[10]; }; From f0506e4923cdbd2b53258bc6c3a2b6bc62c8ccc3 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 22 Sep 2020 23:36:08 -0700 Subject: [PATCH 085/544] [MLIR] Avoid adding debuginfo for a function if it contains calls that has no debug info. Also add a verifier pass to ExecutionEngine. It's hard to come up with a test case, since mlir-opt always add location info after parsing it (?) Differential Revision: https://reviews.llvm.org/D88135 --- mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp | 7 ++++++- mlir/lib/Target/LLVMIR/DebugTranslation.cpp | 12 ++++++++++++ mlir/test/Target/llvmir-debug.mlir | 4 ---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp index baf9b5eba2c89..cadd172ace896 100644 --- a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/ToolOutputFile.h" using namespace mlir; @@ -24,7 +25,11 @@ using namespace mlir; std::unique_ptr mlir::translateModuleToLLVMIR(ModuleOp m, llvm::LLVMContext &llvmContext, StringRef name) { - return LLVM::ModuleTranslation::translateModule<>(m, llvmContext, name); + auto llvmModule = + LLVM::ModuleTranslation::translateModule<>(m, llvmContext, name); + if (verifyModule(*llvmModule)) + emitError(m.getLoc(), "LLVM IR fails to verify"); + return llvmModule; } namespace mlir { diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index af364ba9048fb..a0a19a2c02015 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -71,6 +71,18 @@ void DebugTranslation::translate(LLVMFuncOp func, llvm::Function &llvmFunc) { if (!compileUnit || !func.walk(interruptIfValidLocation).wasInterrupted()) return; + // If we are to create debug info for the function, we need to ensure that all + // inlinable calls in it are with debug info, otherwise the LLVM verifier will + // complain. For now, be more restricted and treat all calls as inlinable. + const bool hasCallWithoutDebugInfo = + func.walk([](LLVM::CallOp call) { + return call.getLoc().isa() ? WalkResult::interrupt() + : WalkResult::advance(); + }) + .wasInterrupted(); + if (hasCallWithoutDebugInfo) + return; + FileLineColLoc fileLoc = extractFileLoc(func.getLoc()); auto *file = translateFile(fileLoc ? fileLoc.getFilename() : ""); unsigned line = fileLoc ? fileLoc.getLine() : 0; diff --git a/mlir/test/Target/llvmir-debug.mlir b/mlir/test/Target/llvmir-debug.mlir index 2a9444839352f..590fb8b2180c7 100644 --- a/mlir/test/Target/llvmir-debug.mlir +++ b/mlir/test/Target/llvmir-debug.mlir @@ -9,10 +9,6 @@ llvm.func @func_no_debug() { // CHECK-LABEL: define void @func_with_debug() // CHECK-SAME: !dbg ![[FUNC_LOC:[0-9]+]] llvm.func @func_with_debug() { - // CHECK: call void @func_no_debug() - // CHECK-NOT: !dbg - llvm.call @func_no_debug() : () -> () loc(unknown) - // CHECK: call void @func_no_debug(), !dbg ![[CALLSITE_LOC:[0-9]+]] llvm.call @func_no_debug() : () -> () loc(callsite("mysource.cc":3:4 at "mysource.cc":5:6)) From 6b70a83d9cc0ec17aa4bc199081c0a51e65be6dd Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Tue, 29 Sep 2020 16:17:47 -0400 Subject: [PATCH 086/544] [ms] [llvm-ml] Add support for .radix directive, and accept all radix specifiers Add support for .radix directive, and radix specifiers [yY] (binary), [oOqQ] (octal), and [tT] (decimal). Also, when lexing MASM integers, require radix specifier; MASM requires that all literals without a radix specifier be treated as in the default radix. (e.g., 0100 = 100) Relanding D87400, now with fewer ms-inline-asm tests broken! Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D88337 --- llvm/include/llvm/MC/MCParser/MCAsmLexer.h | 13 +- llvm/lib/MC/MCParser/AsmLexer.cpp | 125 ++++++++++++++---- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 1 - llvm/lib/MC/MCParser/MasmParser.cpp | 23 ++++ .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 28 ++-- llvm/test/tools/llvm-ml/radix.test | 97 ++++++++++++++ llvm/test/tools/llvm-ml/radix_errors.test | 55 ++++++++ llvm/tools/llvm-ml/llvm-ml.cpp | 2 + 8 files changed, 308 insertions(+), 36 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/radix.test create mode 100644 llvm/test/tools/llvm-ml/radix_errors.test diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h index e89abeaac94c2..1e449a7f59d21 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -50,6 +50,8 @@ class MCAsmLexer { bool AllowAtInIdentifier; bool IsAtStartOfStatement = true; bool LexMasmIntegers = false; + bool UseMasmDefaultRadix = false; + unsigned DefaultRadix = 10; AsmCommentConsumer *CommentConsumer = nullptr; MCAsmLexer(); @@ -147,9 +149,16 @@ class MCAsmLexer { this->CommentConsumer = CommentConsumer; } - /// Set whether to lex masm-style binary and hex literals. They look like - /// 0b1101 and 0ABCh respectively. + /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified + /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]). void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } + + /// Set whether to use masm-style default-radix integer literals. If disabled, + /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]). + void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; } + + unsigned getMasmDefaultRadix() const { return DefaultRadix; } + void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } }; } // end namespace llvm diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index 5a571c7c0c0ee..12a71d69ed79c 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SaveAndRestore.h" #include @@ -271,13 +272,34 @@ static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, return DefaultRadix; } -static AsmToken intToken(StringRef Ref, APInt &Value) -{ +static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) { + while (hexDigitValue(*CurPtr) < DefaultRadix) { + ++CurPtr; + } + return CurPtr; +} + +static AsmToken intToken(StringRef Ref, APInt &Value) { if (Value.isIntN(64)) return AsmToken(AsmToken::Integer, Ref, Value); return AsmToken(AsmToken::BigNum, Ref, Value); } +static std::string radixName(unsigned Radix) { + switch (Radix) { + case 2: + return "binary"; + case 8: + return "octal"; + case 10: + return "decimal"; + case 16: + return "hexadecimal"; + default: + return "base-" + std::to_string(Radix); + } +} + /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] /// Forward/Backward Label: [0-9][fb] @@ -286,16 +308,46 @@ static AsmToken intToken(StringRef Ref, APInt &Value) /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { - // MASM-flavor binary integer: [01]+[bB] + // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY]) + // MASM-flavor octal integer: [0-7]+[oOqQ] + // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT]) // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] if (LexMasmIntegers && isdigit(CurPtr[-1])) { - const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? - CurPtr - 1 : nullptr; + const char *FirstNonBinary = + (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; + const char *FirstNonDecimal = + (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr; const char *OldCurPtr = CurPtr; while (isHexDigit(*CurPtr)) { - if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) - FirstNonBinary = CurPtr; + switch (*CurPtr) { + default: + if (!FirstNonDecimal) { + FirstNonDecimal = CurPtr; + } + LLVM_FALLTHROUGH; + case '9': + case '8': + case '7': + case '6': + case '5': + case '4': + case '3': + case '2': + if (!FirstNonBinary) { + FirstNonBinary = CurPtr; + } + break; + case '1': + case '0': + break; + } + ++CurPtr; + } + if (*CurPtr == '.') { + // MASM float literals (other than hex floats) always contain a ".", and + // are always written in decimal. ++CurPtr; + return LexFloatLiteral(); } unsigned Radix = 0; @@ -303,28 +355,61 @@ AsmToken AsmLexer::LexDigit() { // hexadecimal number ++CurPtr; Radix = 16; + } else if (*CurPtr == 't' || *CurPtr == 'T') { + // decimal number + ++CurPtr; + Radix = 10; + } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' || + *CurPtr == 'Q') { + // octal number + ++CurPtr; + Radix = 8; + } else if (*CurPtr == 'y' || *CurPtr == 'Y') { + // binary number + ++CurPtr; + Radix = 2; + } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr && + DefaultRadix < 14 && + (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) { + Radix = 10; } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && - (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) + DefaultRadix < 12 && + (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) { Radix = 2; + } - if (Radix == 2 || Radix == 16) { + if (Radix) { StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); if (Result.drop_back().getAsInteger(Radix, Value)) - return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : - "invalid hexdecimal number"); + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); // MSVC accepts and ignores type suffices on integer literals. SkipIgnoredIntegerSuffix(CurPtr); return intToken(Result, Value); - } + } - // octal/decimal integers, or floating point numbers, fall through + // default-radix integers, or floating point numbers, fall through CurPtr = OldCurPtr; } + // MASM default-radix integers: [0-9a-fA-F]+ + // (All other integer literals have a radix specifier.) + if (LexMasmIntegers && UseMasmDefaultRadix) { + CurPtr = findLastDigit(CurPtr, 16); + StringRef Result(TokStart, CurPtr - TokStart); + + APInt Value(128, 0, true); + if (Result.getAsInteger(DefaultRadix, Value)) { + return ReturnError(TokStart, + "invalid " + radixName(DefaultRadix) + " number"); + } + + return intToken(Result, Value); + } + // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); @@ -339,13 +424,9 @@ AsmToken AsmLexer::LexDigit() { StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); - if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid decimal number" : - "invalid hexdecimal number"); - - // Consume the [hH]. - if (LexMasmIntegers && Radix == 16) - ++CurPtr; + if (Result.getAsInteger(Radix, Value)) { + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); + } // The darwin/x86 (and x86-64) assembler accepts and ignores type // suffices on integer literals. @@ -416,11 +497,9 @@ AsmToken AsmLexer::LexDigit() { // Either octal or hexadecimal. APInt Value(128, 0, true); unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); - bool isHex = Radix == 16; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid octal number" : - "invalid hexdecimal number"); + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); // Consume the [hH]. if (Radix == 16) diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 532ded038043f..575e6ee265c8e 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -132,7 +132,6 @@ class COFFMasmParser : public MCAsmParserExtension { // option // popcontext // pushcontext - // .radix // .safeseh // Procedure directives diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index ca9b2df7cf231..352d9473a2378 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -732,6 +732,7 @@ class MasmParser : public MCAsmParser { DK_SAVEREG, DK_SAVEXMM128, DK_SETFRAME, + DK_RADIX, }; /// Maps directive name --> DirectiveKind enum, for directives parsed by this @@ -964,6 +965,9 @@ class MasmParser : public MCAsmParser { // ".erre" or ".errnz", depending on ExpectZero. bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero); + // ".radix" + bool parseDirectiveRadix(SMLoc DirectiveLoc); + // "echo" bool parseDirectiveEcho(); @@ -2284,6 +2288,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveErrorIfe(IDLoc, true); case DK_ERRNZ: return parseDirectiveErrorIfe(IDLoc, false); + case DK_RADIX: + return parseDirectiveRadix(IDLoc); case DK_ECHO: return parseDirectiveEcho(); } @@ -6343,6 +6349,7 @@ void MasmParser::initializeDirectiveKindMap() { DirectiveKindMap[".savereg"] = DK_SAVEREG; DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128; DirectiveKindMap[".setframe"] = DK_SETFRAME; + DirectiveKindMap[".radix"] = DK_RADIX; // DirectiveKindMap[".altmacro"] = DK_ALTMACRO; // DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO; DirectiveKindMap["db"] = DK_DB; @@ -6584,6 +6591,22 @@ bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { return false; } +bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) { + const SMLoc Loc = getLexer().getLoc(); + StringRef RadixString = parseStringToEndOfStatement().trim(); + unsigned Radix; + if (RadixString.getAsInteger(10, Radix)) { + return Error(Loc, + "radix must be a decimal number in the range 2 to 16; was " + + RadixString); + } + if (Radix < 2 || Radix > 16) + return Error(Loc, "radix must be in the range 2 to 16; was " + + std::to_string(Radix)); + getLexer().setMasmDefaultRadix(Radix); + return false; +} + bool MasmParser::parseDirectiveEcho() { StringRef Message = parseStringToEndOfStatement(); Lex(); // eat end of statement diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 2b48d4bd8abe7..7a7c81000a2c1 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1662,6 +1662,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if ((Done = SM.isValidEndState())) break; return Error(Tok.getLoc(), "unknown token in expression"); + case AsmToken::Error: + return Error(getLexer().getErrLoc(), getLexer().getErr()); + break; case AsmToken::EndOfStatement: Done = true; break; @@ -2453,21 +2456,26 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) { // Parse memory broadcasting ({1to}). if (getLexer().getTok().getIntVal() != 1) return TokError("Expected 1to at this point"); - Parser.Lex(); // Eat "1" of 1to8 - if (!getLexer().is(AsmToken::Identifier) || - !getLexer().getTok().getIdentifier().startswith("to")) + StringRef Prefix = getLexer().getTok().getString(); + Parser.Lex(); // Eat first token of 1to8 + if (!getLexer().is(AsmToken::Identifier)) return TokError("Expected 1to at this point"); // Recognize only reasonable suffixes. + SmallVector BroadcastVector; + StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier()) + .toStringRef(BroadcastVector); + if (!BroadcastString.startswith("1to")) + return TokError("Expected 1to at this point"); const char *BroadcastPrimitive = - StringSwitch(getLexer().getTok().getIdentifier()) - .Case("to2", "{1to2}") - .Case("to4", "{1to4}") - .Case("to8", "{1to8}") - .Case("to16", "{1to16}") - .Default(nullptr); + StringSwitch(BroadcastString) + .Case("1to2", "{1to2}") + .Case("1to4", "{1to4}") + .Case("1to8", "{1to8}") + .Case("1to16", "{1to16}") + .Default(nullptr); if (!BroadcastPrimitive) return TokError("Invalid memory broadcast primitive."); - Parser.Lex(); // Eat "toN" of 1toN + Parser.Lex(); // Eat trailing token of 1toN if (!getLexer().is(AsmToken::RCurly)) return TokError("Expected } at this point"); Parser.Lex(); // Eat "}" diff --git a/llvm/test/tools/llvm-ml/radix.test b/llvm/test/tools/llvm-ml/radix.test new file mode 100644 index 0000000000000..64333706b07d2 --- /dev/null +++ b/llvm/test/tools/llvm-ml/radix.test @@ -0,0 +1,97 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.code + +t1: +mov eax, 100b +mov eax, 100y + +; CHECK-LABEL: t1: +; CHECK-NEXT: mov eax, 4 +; CHECK-NEXT: mov eax, 4 + +t2: +mov eax, 100o +mov eax, 100q + +; CHECK-LABEL: t2: +; CHECK-NEXT: mov eax, 64 +; CHECK-NEXT: mov eax, 64 + +t3: +mov eax, 100d +mov eax, 100t + +; CHECK-LABEL: t3: +; CHECK-NEXT: mov eax, 100 +; CHECK-NEXT: mov eax, 100 + +t4: +mov eax, 100h + +; CHECK-LABEL: t4: +; CHECK-NEXT: mov eax, 256 + +t5: +mov eax, 100 +.radix 2 +mov eax, 100 +.radix 16 +mov eax, 100 +.radix 10 +mov eax, 100 + +; CHECK-LABEL: t5: +; CHECK: mov eax, 100 +; CHECK: mov eax, 4 +; CHECK: mov eax, 256 +; CHECK: mov eax, 100 + +t6: +.radix 9 +mov eax, 100 +.radix 10 + +; CHECK-LABEL: t6: +; CHECK: mov eax, 81 + +t7: +.radix 12 +mov eax, 100b +mov eax, 100y +.radix 10 + +; CHECK-LABEL: t7: +; CHECK: mov eax, 1739 +; CHECK: mov eax, 4 + +t8: +.radix 16 +mov eax, 100d +mov eax, 100t +.radix 10 + +; CHECK-LABEL: t8: +; CHECK: mov eax, 4109 +; CHECK: mov eax, 100 + +t9: +.radix 12 +mov eax, 102b +.radix 16 +mov eax, 10fd +.radix 10 + +; CHECK-LABEL: t9: +; CHECK: mov eax, 1763 +; CHECK: mov eax, 4349 + +t10: +.radix 16 +mov eax, 1e1 +.radix 10 + +; CHECK-LABEL: t10: +; CHECK: mov eax, 481 + +END diff --git a/llvm/test/tools/llvm-ml/radix_errors.test b/llvm/test/tools/llvm-ml/radix_errors.test new file mode 100644 index 0000000000000..4745e7911900a --- /dev/null +++ b/llvm/test/tools/llvm-ml/radix_errors.test @@ -0,0 +1,55 @@ +; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error: + +.code + +t1: +; CHECK: :[[# @LINE + 1]]:10: error: invalid decimal number +mov eax, 120b +; CHECK: :[[# @LINE + 1]]:10: error: invalid binary number +mov eax, 120y +.radix 11 +; CHECK: :[[# @LINE + 1]]:10: error: invalid base-11 number +mov eax, 120b +; CHECK: :[[# @LINE + 1]]:10: error: invalid binary number +mov eax, 120y +.radix 10 + +t2: +; CHECK: :[[# @LINE + 1]]:10: error: invalid octal number +mov eax, 190o +; CHECK: :[[# @LINE + 1]]:10: error: invalid octal number +mov eax, 190q +.radix 13 +; CHECK: :[[# @LINE + 1]]:10: error: invalid octal number +mov eax, 190o +; CHECK: :[[# @LINE + 1]]:10: error: invalid octal number +mov eax, 190q +.radix 10 + +t3: +; CHECK: :[[# @LINE + 1]]:10: error: invalid decimal number +mov eax, 1f0d +; CHECK: :[[# @LINE + 1]]:10: error: invalid decimal number +mov eax, 1f0t +.radix 13 +; CHECK: :[[# @LINE + 1]]:10: error: invalid base-13 number +mov eax, 1f0d +; CHECK: :[[# @LINE + 1]]:10: error: invalid decimal number +mov eax, 1f0t +.radix 10 + +t4: +; CHECK: :[[# @LINE + 1]]:10: error: invalid decimal number +mov eax, 10e +.radix 16 +.radix 10 +; CHECK: :[[# @LINE + 1]]:10: error: invalid decimal number +mov eax, 10e + +t5: +.radix 9 +; CHECK: :[[# @LINE + 1]]:10: error: invalid base-9 number +mov eax, 9 +.radix 10 + +END diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 5abf22d6d6ddf..460a566f3219c 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -176,6 +176,7 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, raw_ostream &OS) { AsmLexer Lexer(MAI); Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); Lexer.setLexMasmIntegers(true); + Lexer.useMasmDefaultRadix(true); bool Error = false; while (Lexer.Lex().isNot(AsmToken::Eof)) { @@ -206,6 +207,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, Parser->setShowParsedOperands(ShowInstOperands); Parser->setTargetParser(*TAP); Parser->getLexer().setLexMasmIntegers(true); + Parser->getLexer().useMasmDefaultRadix(true); int Res = Parser->Run(/*NoInitialTextSection=*/true); From c65e9e71eb7612b297a8bfd3e74759d55cfc7bf4 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Tue, 29 Sep 2020 16:57:25 -0400 Subject: [PATCH 087/544] [ms] [llvm-ml] Add MASM hex float support Implement MASM's syntax for specifying floats in raw hexadecimal bytes. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D87401 --- llvm/include/llvm/MC/MCParser/MCAsmLexer.h | 4 ++++ llvm/lib/MC/MCParser/AsmLexer.cpp | 5 +++++ llvm/lib/MC/MCParser/MasmParser.cpp | 17 +++++++++++++++++ llvm/test/tools/llvm-ml/builtin_types.test | 8 ++++++++ llvm/tools/llvm-mc/llvm-mc.cpp | 5 +++++ llvm/tools/llvm-ml/llvm-ml.cpp | 2 ++ 6 files changed, 41 insertions(+) diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h index 1e449a7f59d21..e2f3301d2f2b1 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -49,6 +49,7 @@ class MCAsmLexer { bool SkipSpace = true; bool AllowAtInIdentifier; bool IsAtStartOfStatement = true; + bool LexMasmHexFloats = false; bool LexMasmIntegers = false; bool UseMasmDefaultRadix = false; unsigned DefaultRadix = 10; @@ -159,6 +160,9 @@ class MCAsmLexer { unsigned getMasmDefaultRadix() const { return DefaultRadix; } void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } + + /// Set whether to lex masm-style hex float literals, such as 3f800000r. + void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; } }; } // end namespace llvm diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index 12a71d69ed79c..d8a20341bfb9e 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -350,6 +350,11 @@ AsmToken AsmLexer::LexDigit() { return LexFloatLiteral(); } + if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) { + ++CurPtr; + return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); + } + unsigned Radix = 0; if (*CurPtr == 'h' || *CurPtr == 'H') { // hexadecimal number diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 352d9473a2378..d0a52657d6621 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -3425,10 +3425,13 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { // We don't truly support arithmetic on floating point expressions, so we // have to manually parse unary prefixes. bool IsNeg = false; + SMLoc SignLoc; if (getLexer().is(AsmToken::Minus)) { + SignLoc = getLexer().getLoc(); Lexer.Lex(); IsNeg = true; } else if (getLexer().is(AsmToken::Plus)) { + SignLoc = getLexer().getLoc(); Lexer.Lex(); } @@ -3450,6 +3453,20 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { Value = APFloat::getZero(Semantics); else return TokError("invalid floating point literal"); + } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) { + // MASM hexadecimal floating-point literal; no APFloat conversion needed. + // To match ML64.exe, ignore the initial sign. + unsigned Size = Value.getSizeInBits(Semantics); + if (Size != (IDVal.size() << 2)) + return TokError("invalid floating point literal"); + + // Consume the numeric token. + Lex(); + + Res = APInt(Size, IDVal, 16); + if (SignLoc.isValid()) + return Warning(SignLoc, "MASM-style hex floats ignore explicit sign"); + return false; } else if (errorToBool( Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) .takeError())) { diff --git a/llvm/test/tools/llvm-ml/builtin_types.test b/llvm/test/tools/llvm-ml/builtin_types.test index b99c491cb8dd8..f04e318b1b02f 100644 --- a/llvm/test/tools/llvm-ml/builtin_types.test +++ b/llvm/test/tools/llvm-ml/builtin_types.test @@ -72,6 +72,14 @@ t6_double REAL8 1.3 ; CHECK-LABEL: t6_double: ; CHECK-NEXT: .quad 4608533498688228557 +t7_single_hex REAL4 3f800000r +t7_double_hex REAL8 3FF0000000000000R + +; CHECK-LABEL: t7_single_hex: +; CHECK-NEXT: .long 1065353216 +; CHECK-LABEL: t7_double_hex: +; CHECK-NEXT: .quad 4607182418800017408 + .code END diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 66b55abc48983..f8352f36ad756 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -169,6 +169,10 @@ static cl::opt LexMasmIntegers( "masm-integers", cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)")); +static cl::opt LexMasmHexFloats( + "masm-hexfloats", + cl::desc("Enable MASM-style hex float initializers (3F800000r)")); + static cl::opt NoExecStack("no-exec-stack", cl::desc("File doesn't need an exec stack")); @@ -300,6 +304,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, Parser->setShowParsedOperands(ShowInstOperands); Parser->setTargetParser(*TAP); Parser->getLexer().setLexMasmIntegers(LexMasmIntegers); + Parser->getLexer().setLexMasmHexFloats(LexMasmHexFloats); int Res = Parser->Run(NoInitialTextSection); diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 460a566f3219c..3a3984286e4af 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -177,6 +177,7 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, raw_ostream &OS) { Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); Lexer.setLexMasmIntegers(true); Lexer.useMasmDefaultRadix(true); + Lexer.setLexMasmHexFloats(true); bool Error = false; while (Lexer.Lex().isNot(AsmToken::Eof)) { @@ -208,6 +209,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, Parser->setTargetParser(*TAP); Parser->getLexer().setLexMasmIntegers(true); Parser->getLexer().useMasmDefaultRadix(true); + Parser->getLexer().setLexMasmHexFloats(true); int Res = Parser->Run(/*NoInitialTextSection=*/true); From fdd23a35422ca133410c6b066ea191f426267c46 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Tue, 29 Sep 2020 16:58:39 -0400 Subject: [PATCH 088/544] [ms] [llvm-ml] Add REAL10 support (x87 extended precision) Add MASM support for 80-bit reals in the x87 extended precision format. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D87402 --- llvm/include/llvm/MC/MCStreamer.h | 2 + llvm/lib/MC/MCParser/MasmParser.cpp | 46 +++++++++++++++------ llvm/lib/MC/MCStreamer.cpp | 15 +++++++ llvm/test/tools/llvm-ml/builtin_types.test | 24 +++++++---- llvm/test/tools/llvm-ml/type_operators.test | 8 ++++ 5 files changed, 74 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 63a4c1d190aca..8faa3b0c8efbf 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -13,6 +13,7 @@ #ifndef LLVM_MC_MCSTREAMER_H #define LLVM_MC_MCSTREAMER_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -673,6 +674,7 @@ class MCStreamer { /// Special case of EmitValue that avoids the client having /// to pass in a MCExpr for constant integers. virtual void emitIntValue(uint64_t Value, unsigned Size); + virtual void emitIntValue(APInt Value); /// Special case of EmitValue that avoids the client having to pass /// in a MCExpr for constant integers & prints in Hex format for certain diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index d0a52657d6621..0d5d6a112902b 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -634,6 +634,7 @@ class MasmParser : public MCAsmParser { DK_DW, DK_REAL4, DK_REAL8, + DK_REAL10, DK_ALIGN, DK_ORG, DK_ENDR, @@ -771,7 +772,7 @@ class MasmParser : public MCAsmParser { bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size, StringRef Name, SMLoc NameLoc); - // "real4", "real8" + // "real4", "real8", "real10" bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr); bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, @@ -2147,6 +2148,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); case DK_REAL8: return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); + case DK_REAL10: + return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10); case DK_STRUCT: case DK_UNION: return parseDirectiveNestedStruct(IDVal, DirKind); @@ -2382,6 +2385,10 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, Lex(); return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, IDVal, IDLoc); + case DK_REAL10: + Lex(); + return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(), + 10, IDVal, IDLoc); case DK_STRUCT: case DK_UNION: Lex(); @@ -3456,14 +3463,14 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) { // MASM hexadecimal floating-point literal; no APFloat conversion needed. // To match ML64.exe, ignore the initial sign. - unsigned Size = Value.getSizeInBits(Semantics); - if (Size != (IDVal.size() << 2)) + unsigned SizeInBits = Value.getSizeInBits(Semantics); + if (SizeInBits != (IDVal.size() << 2)) return TokError("invalid floating point literal"); // Consume the numeric token. Lex(); - Res = APInt(Size, IDVal, 16); + Res = APInt(SizeInBits, IDVal, 16); if (SignLoc.isValid()) return Warning(SignLoc, "MASM-style hex floats ignore explicit sign"); return false; @@ -3540,8 +3547,7 @@ bool MasmParser::emitRealValues(const fltSemantics &Semantics, return true; for (const APInt &AsInt : ValuesAsInt) { - getStreamer().emitIntValue(AsInt.getLimitedValue(), - AsInt.getBitWidth() / 8); + getStreamer().emitIntValue(AsInt); } if (Count) *Count = ValuesAsInt.size(); @@ -3571,7 +3577,7 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, } /// parseDirectiveRealValue -/// ::= (real4 | real8) [ expression (, expression)* ] +/// ::= (real4 | real8 | real10) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, size_t Size) { @@ -3586,7 +3592,7 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, } /// parseDirectiveNamedRealValue -/// ::= name (real4 | real8) [ expression (, expression)* ] +/// ::= name (real4 | real8 | real10) [ expression (, expression)* ] bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName, const fltSemantics &Semantics, unsigned Size, StringRef Name, @@ -3680,8 +3686,20 @@ bool MasmParser::parseFieldInitializer(const FieldInfo &Field, bool MasmParser::parseFieldInitializer(const FieldInfo &Field, const RealFieldInfo &Contents, FieldInitializer &Initializer) { - const fltSemantics &Semantics = - (Field.Type == 4) ? APFloat::IEEEsingle() : APFloat::IEEEdouble(); + const fltSemantics *Semantics; + switch (Field.Type) { + case 4: + Semantics = &APFloat::IEEEsingle(); + break; + case 8: + Semantics = &APFloat::IEEEdouble(); + break; + case 10: + Semantics = &APFloat::x87DoubleExtended(); + break; + default: + llvm_unreachable("unknown real field type"); + } SMLoc Loc = getTok().getLoc(); @@ -3689,20 +3707,20 @@ bool MasmParser::parseFieldInitializer(const FieldInfo &Field, if (parseOptionalToken(AsmToken::LCurly)) { if (Field.LengthOf == 1) return Error(Loc, "Cannot initialize scalar field with array value"); - if (parseRealInstList(Semantics, AsIntValues, AsmToken::RCurly) || + if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) || parseToken(AsmToken::RCurly)) return true; } else if (parseOptionalAngleBracketOpen()) { if (Field.LengthOf == 1) return Error(Loc, "Cannot initialize scalar field with array value"); - if (parseRealInstList(Semantics, AsIntValues, AsmToken::Greater) || + if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) || parseAngleBracketClose()) return true; } else if (Field.LengthOf > 1) { return Error(Loc, "Cannot initialize array field with scalar value"); } else { AsIntValues.emplace_back(); - if (parseRealValue(Semantics, AsIntValues.back())) + if (parseRealValue(*Semantics, AsIntValues.back())) return true; } @@ -6278,6 +6296,7 @@ void MasmParser::initializeDirectiveKindMap() { DirectiveKindMap["sqword"] = DK_SQWORD; DirectiveKindMap["real4"] = DK_REAL4; DirectiveKindMap["real8"] = DK_REAL8; + DirectiveKindMap["real10"] = DK_REAL10; DirectiveKindMap["align"] = DK_ALIGN; // DirectiveKindMap[".org"] = DK_ORG; DirectiveKindMap["extern"] = DK_EXTERN; @@ -6732,6 +6751,7 @@ bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const { .CasesLower("qword", "dq", "sqword", 8) .CaseLower("real4", 4) .CaseLower("real8", 8) + .CaseLower("real10", 10) .Default(0); if (Size) { Info.Name = Name; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 995828b577380..46aa0b89842ca 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -138,6 +138,21 @@ void MCStreamer::emitIntValue(uint64_t Value, unsigned Size) { unsigned Index = IsLittleEndian ? 0 : 8 - Size; emitBytes(StringRef(reinterpret_cast(&Swapped) + Index, Size)); } +void MCStreamer::emitIntValue(APInt Value) { + if (Value.getNumWords() == 1) { + emitIntValue(Value.getLimitedValue(), Value.getBitWidth() / 8); + return; + } + + const bool IsLittleEndianTarget = Context.getAsmInfo()->isLittleEndian(); + const bool ShouldSwap = sys::IsLittleEndianHost != IsLittleEndianTarget; + const APInt Swapped = ShouldSwap ? Value.byteSwap() : Value; + const unsigned Size = Value.getBitWidth() / 8; + SmallString<10> Tmp; + Tmp.resize(Size); + StoreIntToMemory(Swapped, reinterpret_cast(Tmp.data()), Size); + emitBytes(Tmp.str()); +} /// EmitULEB128IntValue - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. diff --git a/llvm/test/tools/llvm-ml/builtin_types.test b/llvm/test/tools/llvm-ml/builtin_types.test index f04e318b1b02f..89ec1cfd0d711 100644 --- a/llvm/test/tools/llvm-ml/builtin_types.test +++ b/llvm/test/tools/llvm-ml/builtin_types.test @@ -65,20 +65,28 @@ t5_signed SQWORD -4611686018427387904 ; CHECK-NEXT: .quad -4611686018427387904 t6_single REAL4 1.3 -t6_double REAL8 1.3 +t6_single_hex REAL4 3fa66666r ; CHECK-LABEL: t6_single: ; CHECK-NEXT: .long 1067869798 -; CHECK-LABEL: t6_double: -; CHECK-NEXT: .quad 4608533498688228557 +; CHECK-LABEL: t6_single_hex: +; CHECK-NEXT: .long 1067869798 -t7_single_hex REAL4 3f800000r -t7_double_hex REAL8 3FF0000000000000R +t7_double REAL8 1.3 +t7_double_hex REAL8 3FF4CCCCCCCCCCCDR -; CHECK-LABEL: t7_single_hex: -; CHECK-NEXT: .long 1065353216 +; CHECK-LABEL: t7_double: +; CHECK-NEXT: .quad 4608533498688228557 ; CHECK-LABEL: t7_double_hex: -; CHECK-NEXT: .quad 4607182418800017408 +; CHECK-NEXT: .quad 4608533498688228557 + +t8_extended REAL10 1.3 +t8_extended_hex REAL10 3FFFA666666666666666r + +; CHECK-LABEL: t8_extended: +; CHECK-NEXT: .ascii "fffffff\246\377?" +; CHECK-LABEL: t8_extended_hex: +; CHECK-NEXT: .ascii "fffffff\246\377?" .code diff --git a/llvm/test/tools/llvm-ml/type_operators.test b/llvm/test/tools/llvm-ml/type_operators.test index b8546927e3efb..7de6cdd9448ee 100644 --- a/llvm/test/tools/llvm-ml/type_operators.test +++ b/llvm/test/tools/llvm-ml/type_operators.test @@ -196,6 +196,7 @@ mov eax, type(t6_signed) t7_single REAL4 2 DUP (?) t7_double REAL8 ? +t7_extended REAL10 3 DUP (?) t7: ; CHECK-LABEL: t7: @@ -214,6 +215,13 @@ mov eax, type(t7_double) ; CHECK: mov eax, 1 ; CHECK: mov eax, 8 +mov eax, sizeof(t7_extended) +mov eax, lengthof(t7_extended) +mov eax, type(t7_extended) +; CHECK: mov eax, 30 +; CHECK: mov eax, 3 +; CHECK: mov eax, 10 + t8_var FOO <>, <> From 0548d1ca24b72d28e50fbd8a456b1fd36beacb07 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Tue, 29 Sep 2020 16:59:42 -0400 Subject: [PATCH 089/544] [ms] [llvm-ml] Add support for "alias" directive Support the "alias" directive. Required support for emitWeakReference in MCWinCOFFStreamer. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D87403 --- llvm/include/llvm/MC/MCWinCOFFStreamer.h | 1 + llvm/lib/MC/MCParser/COFFMasmParser.cpp | 34 +++++++-- llvm/lib/MC/MCWinCOFFStreamer.cpp | 10 +++ llvm/test/tools/llvm-ml/alias.test | 92 +++++++++++++++++++++++ llvm/test/tools/llvm-ml/alias_errors.test | 36 +++++++++ llvm/test/tools/llvm-ml/proc.test | 5 -- llvm/test/tools/llvm-ml/proc_frame.test | 5 -- 7 files changed, 166 insertions(+), 17 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/alias.test create mode 100644 llvm/test/tools/llvm-ml/alias_errors.test diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h index 1236304b9e5da..53b2ef0bd96ea 100644 --- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h @@ -58,6 +58,7 @@ class MCWinCOFFStreamer : public MCObjectStreamer { unsigned ByteAlignment) override; void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; + void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; void emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, SMLoc Loc = SMLoc()) override; void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 575e6ee265c8e..94146683e8a14 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/SMLoc.h" #include @@ -53,6 +54,8 @@ class COFFMasmParser : public MCAsmParserExtension { bool ParseDirectiveSegmentEnd(StringRef, SMLoc); bool ParseDirectiveIncludelib(StringRef, SMLoc); + bool ParseDirectiveAlias(StringRef, SMLoc); + bool ParseSEHDirectiveAllocStack(StringRef, SMLoc); bool ParseSEHDirectiveEndProlog(StringRef, SMLoc); @@ -124,7 +127,7 @@ class COFFMasmParser : public MCAsmParserExtension { // purge // Miscellaneous directives - // alias + addDirectiveHandler<&COFFMasmParser::ParseDirectiveAlias>("alias"); // assume // .fpo addDirectiveHandler<&COFFMasmParser::ParseDirectiveIncludelib>( @@ -343,13 +346,11 @@ bool COFFMasmParser::ParseDirectiveProc(StringRef Directive, SMLoc Loc) { nextLoc = getTok().getLoc(); } } - MCSymbol *Sym = getContext().getOrCreateSymbol(Label); + MCSymbolCOFF *Sym = cast(getContext().getOrCreateSymbol(Label)); - // Define symbol as simple function - getStreamer().BeginCOFFSymbolDef(Sym); - getStreamer().EmitCOFFSymbolStorageClass(2); - getStreamer().EmitCOFFSymbolType(0x20); - getStreamer().EndCOFFSymbolDef(); + // Define symbol as simple external function + Sym->setExternal(true); + Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); bool Framed = false; if (getLexer().is(AsmToken::Identifier) && @@ -384,6 +385,25 @@ bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) { return false; } +bool COFFMasmParser::ParseDirectiveAlias(StringRef Directive, SMLoc Loc) { + std::string AliasName, ActualName; + if (getTok().isNot(AsmToken::Less) || + getParser().parseAngleBracketString(AliasName)) + return Error(getTok().getLoc(), "expected "); + if (getParser().parseToken(AsmToken::Equal)) + return addErrorSuffix(" in " + Directive + " directive"); + if (getTok().isNot(AsmToken::Less) || + getParser().parseAngleBracketString(ActualName)) + return Error(getTok().getLoc(), "expected "); + + MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); + MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName); + + getStreamer().emitWeakReference(Alias, Actual); + + return false; +} + bool COFFMasmParser::ParseSEHDirectiveAllocStack(StringRef Directive, SMLoc Loc) { int64_t Size; diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index 520d4a0246915..97cceac74ac2d 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -308,6 +308,16 @@ void MCWinCOFFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size, PopSection(); } +void MCWinCOFFStreamer::emitWeakReference(MCSymbol *AliasS, + const MCSymbol *Symbol) { + auto *Alias = cast(AliasS); + emitSymbolAttribute(Alias, MCSA_Weak); + + getAssembler().registerSymbol(*Symbol); + Alias->setVariableValue(MCSymbolRefExpr::create( + Symbol, MCSymbolRefExpr::VK_WEAKREF, getContext())); +} + void MCWinCOFFStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, SMLoc Loc) { diff --git a/llvm/test/tools/llvm-ml/alias.test b/llvm/test/tools/llvm-ml/alias.test new file mode 100644 index 0000000000000..2daaecdbbcc05 --- /dev/null +++ b/llvm/test/tools/llvm-ml/alias.test @@ -0,0 +1,92 @@ +; RUN: llvm-ml -filetype=obj %s | llvm-readobj --syms - | FileCheck %s + +.code + +proc1 PROC + ret +proc1 ENDP + +proc2 PROC + ret +proc2 ENDP + +alias = +; CHECK: Symbol { +; CHECK: Name: t1 +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED (0) +; CHECK-NEXT: BaseType: Null +; CHECK-NEXT: ComplexType: Null +; CHECK-NEXT: StorageClass: WeakExternal +; CHECK-NEXT: AuxSymbolCount: 1 +; CHECK-NEXT: AuxWeakExternal { +; CHECK-NEXT: Linked: proc1 +; CHECK-NEXT: Search: Alias +; CHECK-NEXT: } +; CHECK-NEXT: } + +alias = +; CHECK: Symbol { +; CHECK: Name: t2 +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED (0) +; CHECK-NEXT: BaseType: Null +; CHECK-NEXT: ComplexType: Null +; CHECK-NEXT: StorageClass: WeakExternal +; CHECK-NEXT: AuxSymbolCount: 1 +; CHECK-NEXT: AuxWeakExternal { +; CHECK-NEXT: Linked: proc2 +; CHECK-NEXT: Search: Alias +; CHECK-NEXT: } +; CHECK-NEXT: } + +alias = +; CHECK: Symbol { +; CHECK: Name: t3 +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED (0) +; CHECK-NEXT: BaseType: Null +; CHECK-NEXT: ComplexType: Null +; CHECK-NEXT: StorageClass: WeakExternal +; CHECK-NEXT: AuxSymbolCount: 1 +; CHECK-NEXT: AuxWeakExternal { +; CHECK-NEXT: Linked: foo +; CHECK-NEXT: Search: Alias +; CHECK-NEXT: } +; CHECK-NEXT: } + +alias = +bar PROC + ret +bar ENDP + +; CHECK: Symbol { +; CHECK: Name: t4 +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED (0) +; CHECK-NEXT: BaseType: Null +; CHECK-NEXT: ComplexType: Null +; CHECK-NEXT: StorageClass: WeakExternal +; CHECK-NEXT: AuxSymbolCount: 1 +; CHECK-NEXT: AuxWeakExternal { +; CHECK-NEXT: Linked: bar +; CHECK-NEXT: Search: Alias +; CHECK-NEXT: } +; CHECK-NEXT: } + +alias = +; CHECK: Symbol { +; CHECK: Name: t5 +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: Section: IMAGE_SYM_UNDEFINED (0) +; CHECK-NEXT: BaseType: Null +; CHECK-NEXT: ComplexType: Null +; CHECK-NEXT: StorageClass: WeakExternal +; CHECK-NEXT: AuxSymbolCount: 1 +; CHECK-NEXT: AuxWeakExternal { +; CHECK-NEXT: Linked: t2 +; CHECK-NEXT: Search: Alias +; CHECK-NEXT: } +; CHECK-NEXT: } + +END diff --git a/llvm/test/tools/llvm-ml/alias_errors.test b/llvm/test/tools/llvm-ml/alias_errors.test new file mode 100644 index 0000000000000..9d51b2a993ac0 --- /dev/null +++ b/llvm/test/tools/llvm-ml/alias_errors.test @@ -0,0 +1,36 @@ +; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s + +.code + +foo PROC + ret +foo ENDP + +bar PROC + ret +bar ENDP + +t1: +alias foo = bar +alias foo = +alias = bar + +; CHECK: error: expected +; CHECK: error: expected +; CHECK: error: expected + +t2: +alias +alias , + +; CHECK: error: unexpected token in alias directive +; CHECK: error: unexpected token in alias directive + +t3: +alias +alias + +END \ No newline at end of file diff --git a/llvm/test/tools/llvm-ml/proc.test b/llvm/test/tools/llvm-ml/proc.test index ad117f7fb1dde..15e253a310832 100644 --- a/llvm/test/tools/llvm-ml/proc.test +++ b/llvm/test/tools/llvm-ml/proc.test @@ -7,11 +7,6 @@ t1 PROC ret t1 ENDP -; CHECK: .def t1 -; CHECK-NEXT: .scl 2 -; CHECK-NEXT: .type 32 -; CHECK-NEXT: .endef - ; CHECK: t1: ; CHECK: ret diff --git a/llvm/test/tools/llvm-ml/proc_frame.test b/llvm/test/tools/llvm-ml/proc_frame.test index 3bf1c3a3ca4ba..f98721467474c 100644 --- a/llvm/test/tools/llvm-ml/proc_frame.test +++ b/llvm/test/tools/llvm-ml/proc_frame.test @@ -13,11 +13,6 @@ t1 PROC FRAME ret t1 ENDP -; CHECK: .def t1 -; CHECK-NEXT: .scl 2 -; CHECK-NEXT: .type 32 -; CHECK-NEXT: .endef - ; CHECK: .seh_proc t1 ; CHECK: t1: From feb74530f86516de211d8b91eab426fc39d1b3e8 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Tue, 29 Sep 2020 17:01:05 -0400 Subject: [PATCH 090/544] [ms] [llvm-ml] Accept whitespace around the dot operator MASM allows arbitrary whitespace around the Intel dot operator, especially when used for struct field lookup Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D88450 --- llvm/lib/MC/MCParser/MasmParser.cpp | 2 + .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 74 +++++++++++++++++-- llvm/test/tools/llvm-ml/dot_operator.test | 67 +++++++++++++++++ llvm/test/tools/llvm-ml/struct.test | 2 +- 4 files changed, 138 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/dot_operator.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 0d5d6a112902b..c574b8715b162 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -6728,6 +6728,8 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, Info.Type.Length = Field.LengthOf; if (Field.Contents.FT == FT_STRUCT) Info.Type.Name = Field.Contents.StructInfo.Structure.Name; + else + Info.Type.Name = ""; return false; } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 7a7c81000a2c1..1f594c54c4107 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1674,6 +1674,18 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (ParseIntelDotOperator(SM, End)) return true; break; + case AsmToken::Dot: + if (!Parser.isParsingMasm()) { + if ((Done = SM.isValidEndState())) + break; + return Error(Tok.getLoc(), "unknown token in expression"); + } + // MASM allows spaces around the dot operator (e.g., "var . x") + Lex(); + UpdateLocLex = false; + if (ParseIntelDotOperator(SM, End)) + return true; + break; case AsmToken::Dollar: if (!Parser.isParsingMasm()) { if ((Done = SM.isValidEndState())) @@ -1687,6 +1699,23 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; + if (Parser.isParsingMasm()) { + size_t DotOffset = Identifier.find_first_of('.'); + if (DotOffset != StringRef::npos) { + consumeToken(); + StringRef LHS = Identifier.slice(0, DotOffset); + StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1); + StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos); + if (!RHS.empty()) { + getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS)); + } + getLexer().UnLex(AsmToken(AsmToken::Dot, Dot)); + if (!LHS.empty()) { + getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS)); + } + break; + } + } // (MASM only) PTR operator if (Parser.isParsingMasm()) { const AsmToken &NextTok = getLexer().peekTok(); @@ -1744,7 +1773,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { } // Symbol reference, when parsing assembly content InlineAsmIdentifierInfo Info; - AsmTypeInfo Type; + AsmFieldInfo FieldInfo; const MCExpr *Val; if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { // MS Dot Operator expression @@ -1761,8 +1790,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { if (SM.onInteger(Val, ErrMsg)) return Error(IdentLoc, ErrMsg); - } else + } else { return true; + } break; } // MS InlineAsm identifier @@ -1771,7 +1801,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return Error(IdentLoc, "expected identifier"); if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) return true; - else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, true, ErrMsg)) + else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, + true, ErrMsg)) return Error(IdentLoc, ErrMsg); break; } @@ -1784,11 +1815,35 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return Error(IdentLoc, ErrMsg); break; } + if (!getParser().lookUpType(Identifier, FieldInfo.Type)) { + // Field offset immediate; . + Lex(); // eat type + bool EndDot = parseOptionalToken(AsmToken::Dot); + while (EndDot || (getTok().is(AsmToken::Identifier) && + getTok().getString().startswith("."))) { + getParser().parseIdentifier(Identifier); + if (!EndDot) + Identifier.consume_front("."); + EndDot = Identifier.consume_back("."); + if (getParser().lookUpField(FieldInfo.Type.Name, Identifier, + FieldInfo)) { + SMLoc IDEnd = + SMLoc::getFromPointer(Identifier.data() + Identifier.size()); + return Error(IdentLoc, "Unable to lookup field reference!", + SMRange(IdentLoc, IDEnd)); + } + if (!EndDot) + EndDot = parseOptionalToken(AsmToken::Dot); + } + if (SM.onInteger(FieldInfo.Offset, ErrMsg)) + return Error(IdentLoc, ErrMsg); + break; + } } - if (getParser().parsePrimaryExpr(Val, End, &Type)) { + if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) { return Error(Tok.getLoc(), "Unexpected identifier!"); - } else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, false, - ErrMsg)) { + } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, + false, ErrMsg)) { return Error(IdentLoc, ErrMsg); } break; @@ -2006,6 +2061,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, StringRef DotDispStr = Tok.getString(); if (DotDispStr.startswith(".")) DotDispStr = DotDispStr.drop_front(1); + StringRef TrailingDot; // .Imm gets lexed as a real. if (Tok.is(AsmToken::Real)) { @@ -2014,6 +2070,10 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, Info.Offset = DotDisp.getZExtValue(); } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && Tok.is(AsmToken::Identifier)) { + if (DotDispStr.endswith(".")) { + TrailingDot = DotDispStr.substr(DotDispStr.size() - 1); + DotDispStr = DotDispStr.drop_back(1); + } const std::pair BaseMember = DotDispStr.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && @@ -2031,6 +2091,8 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); while (Tok.getLoc().getPointer() < DotExprEndLoc) Lex(); + if (!TrailingDot.empty()) + getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot)); SM.addImm(Info.Offset); SM.setTypeInfo(Info.Type); return false; diff --git a/llvm/test/tools/llvm-ml/dot_operator.test b/llvm/test/tools/llvm-ml/dot_operator.test new file mode 100644 index 0000000000000..bbea6152e8db4 --- /dev/null +++ b/llvm/test/tools/llvm-ml/dot_operator.test @@ -0,0 +1,67 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +FOO STRUCT + a BYTE ? + b BYTE ? + c BYTE ? + d BYTE ? +FOO ENDS + +BAR STRUCT + e WORD ? + f WORD ? +BAR ENDS + +var FOO <> + +.code + +t1: +mov al, var.a +mov al, var. b +mov al, var .c +mov al, var . d + +; CHECK-LABEL: t1: +; CHECK: mov al, byte ptr [rip + var] +; CHECK: mov al, byte ptr [rip + var+1] +; CHECK: mov al, byte ptr [rip + var+2] +; CHECK: mov al, byte ptr [rip + var+3] + +t2: +mov eax, FOO.a +mov ax, FOO. b +mov al, FOO .c +mov eax, FOO . d + +; CHECK-LABEL: t2: +; CHECK: mov eax, 0 +; CHECK: mov ax, 1 +; CHECK: mov al, 2 +; CHECK: mov eax, 3 + +t3: +mov al, BYTE PTR var[FOO.c] + +; CHECK-LABEL: t3: +; CHECK: mov al, byte ptr [rip + var+2] + +t4: +mov ax, var.BAR.f +mov ax, var .BAR.f +mov ax, var. BAR.f +mov ax, var.BAR .f +mov ax, var.BAR. f +mov ax, var . BAR . f + +; CHECK-LABEL: t4: +; CHECK: mov ax, word ptr [rip + var+2] +; CHECK: mov ax, word ptr [rip + var+2] +; CHECK: mov ax, word ptr [rip + var+2] +; CHECK: mov ax, word ptr [rip + var+2] +; CHECK: mov ax, word ptr [rip + var+2] +; CHECK: mov ax, word ptr [rip + var+2] + +END diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test index facd7c14e4f4d..479d31c8121f4 100644 --- a/llvm/test/tools/llvm-ml/struct.test +++ b/llvm/test/tools/llvm-ml/struct.test @@ -140,7 +140,7 @@ mov al, [t2.FOOBAR.e.b] ; CHECK-NEXT: mov al, byte ptr [rip + t2+9] ; CHECK-NEXT: mov al, byte ptr [rip + t2+9] ; CHECK-NEXT: mov al, byte ptr [rip + t2+9] -; CHECK-NEXT: mov al, byte ptr [rip + (t2+8)+1] +; CHECK-NEXT: mov al, byte ptr [rip + t2+9] ; CHECK-NEXT: mov al, byte ptr [rip + t2+9] QUUX STRUCT From 13f701b99ca1e0786258ef05b0439d4893aac8cb Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Tue, 29 Sep 2020 17:02:18 -0400 Subject: [PATCH 091/544] [ms] [llvm-ml] Create the @feat.00 symbol, and populate it appropriately @feat.00 is a bitfield read by Microsoft-style linkers, and is required to signal (e.g.) /safeseh support on 32-bit systems. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D88451 --- llvm/test/tools/llvm-ml/feat00.test | 24 +++++++++++++++ llvm/test/tools/llvm-ml/feat00_override.test | 21 +++++++++++++ llvm/tools/llvm-ml/llvm-ml.cpp | 31 +++++++++++++++++++- 3 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/llvm-ml/feat00.test create mode 100644 llvm/test/tools/llvm-ml/feat00_override.test diff --git a/llvm/test/tools/llvm-ml/feat00.test b/llvm/test/tools/llvm-ml/feat00.test new file mode 100644 index 0000000000000..1b5070e8c86aa --- /dev/null +++ b/llvm/test/tools/llvm-ml/feat00.test @@ -0,0 +1,24 @@ +; RUN: llvm-ml -m32 -filetype=obj %s | llvm-readobj --syms - | FileCheck %s --check-prefix=CHECK-OBJ --check-prefix=CHECK-OBJ-NOSAFESEH +; RUN: llvm-ml -m64 -filetype=obj %s | llvm-readobj --syms - | FileCheck %s --check-prefix=CHECK-OBJ --check-prefix=CHECK-OBJ-NOSAFESEH + +; RUN: llvm-ml -m32 -safeseh -filetype=obj %s | llvm-readobj --syms - | FileCheck %s --check-prefix=CHECK-OBJ --check-prefix=CHECK-OBJ-SAFESEH +; RUN: llvm-ml -m64 -safeseh -filetype=obj %s -o %t.obj 2>&1 | FileCheck %s --check-prefix=CHECK-SAFESEH64 +; RUN: llvm-readobj --syms %t.obj | FileCheck %s --check-prefix=CHECK-OBJ --check-prefix=CHECK-OBJ-NOSAFESEH + +; CHECK-SAFESEH64: warning: /safeseh applies only to 32-bit X86 platforms; ignoring. + +.code +noop: + ret +end + +; CHECK-OBJ: Symbol { +; CHECK-OBJ: Name: @feat.00 +; CHECK-OBJ-NOSAFESEH: Value: 2 +; CHECK-OBJ-SAFESEH: Value: 3 +; CHECK-OBJ-NEXT: Section: IMAGE_SYM_ABSOLUTE +; CHECK-OBJ-NEXT: BaseType: Null +; CHECK-OBJ-NEXT: ComplexType: Null +; CHECK-OBJ-NEXT: StorageClass: External +; CHECK-OBJ-NEXT: AuxSymbolCount: 0 +; CHECK-OBJ-NEXT: } diff --git a/llvm/test/tools/llvm-ml/feat00_override.test b/llvm/test/tools/llvm-ml/feat00_override.test new file mode 100644 index 0000000000000..358fc85660819 --- /dev/null +++ b/llvm/test/tools/llvm-ml/feat00_override.test @@ -0,0 +1,21 @@ +; RUN: llvm-ml -m32 -filetype=obj %s | llvm-readobj --syms - | FileCheck %s +; RUN: llvm-ml -m64 -filetype=obj %s | llvm-readobj --syms - | FileCheck %s +; RUN: llvm-ml -m32 -safeseh -filetype=obj %s | llvm-readobj --syms - | FileCheck %s + +.code + +@feat.00 = 99 + +noop: + ret +end + +; CHECK: Symbol { +; CHECK: Name: @feat.00 +; CHECK: Value: 99 +; CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE +; CHECK-NEXT: BaseType: Null +; CHECK-NEXT: ComplexType: Null +; CHECK-NEXT: StorageClass: Static +; CHECK-NEXT: AuxSymbolCount: 0 +; CHECK-NEXT: } diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 3a3984286e4af..1586870e0855d 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -99,6 +99,12 @@ cl::opt Bitness(cl::desc("Choose bitness:"), cl::init(m64), cl::values(clEnumVal(m32, "32-bit"), clEnumVal(m64, "64-bit (default)"))); +static cl::opt SafeSEH( + "safeseh", + cl::desc("Mark resulting object files as either containing no " + "exception handlers or containing exception handlers that " + "are all declared with .SAFESEH. Only available in 32-bit.")); + static cl::opt TripleName("triple", cl::desc("Target triple to assemble for, " "see -version for available targets")); @@ -195,7 +201,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, MCAsmInfo &MAI, MCSubtargetInfo &STI, MCInstrInfo &MCII, MCTargetOptions &MCOptions) { std::unique_ptr Parser( - createMCMasmParser(SrcMgr, Ctx, Str, MAI)); + createMCMasmParser(SrcMgr, Ctx, Str, MAI, 0)); std::unique_ptr TAP( TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); @@ -240,6 +246,12 @@ int main(int argc, char **argv) { // construct the Triple object. Triple TheTriple(TripleName); + if (SafeSEH && !(TheTriple.isArch32Bit() && TheTriple.isX86())) { + WithColor::warning() + << "/safeseh applies only to 32-bit X86 platforms; ignoring.\n"; + SafeSEH = false; + } + ErrorOr> BufferPtr = MemoryBuffer::getFileOrSTDIN(InputFilename); if (std::error_code EC = BufferPtr.getError()) { @@ -354,6 +366,23 @@ int main(int argc, char **argv) { /*DWARFMustBeAtTheEnd*/ false)); } + if (TheTriple.isOSBinFormatCOFF()) { + // Emit an absolute @feat.00 symbol. This is a features bitfield read by + // link.exe. + int64_t Feat00Flags = 0x2; + if (SafeSEH) { + // According to the PE-COFF spec, the LSB of this value marks the object + // for "registered SEH". This means that all SEH handler entry points + // must be registered in .sxdata. Use of any unregistered handlers will + // cause the process to terminate immediately. + Feat00Flags |= 0x1; + } + MCSymbol *Feat00Sym = Ctx.getOrCreateSymbol("@feat.00"); + Feat00Sym->setRedefinable(true); + Str->emitSymbolAttribute(Feat00Sym, MCSA_Global); + Str->emitAssignment(Feat00Sym, MCConstantExpr::create(Feat00Flags, Ctx)); + } + // Use Assembler information for parsing. Str->setUseAssemblerInfoForParsing(true); From 80381c4dc92572f54774041f0ad6786112360738 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Tue, 29 Sep 2020 16:20:33 -0500 Subject: [PATCH 092/544] [SVE] Lower fixed length VECREDUCE_[FMAX|FMIN] to Scalable Differential Revision: https://reviews.llvm.org/D88444 --- .../Target/AArch64/AArch64ISelLowering.cpp | 10 +- .../AArch64/sve-fixed-length-fp-reduce.ll | 445 ++++++++++++++++++ 2 files changed, 453 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fceb98d445ff6..578bf1560d019 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1223,6 +1223,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::UMAX, VT, Custom); setOperationAction(ISD::UMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); @@ -9662,8 +9664,8 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, // Try to lower fixed length reductions to SVE. EVT SrcVT = Src.getValueType(); - bool OverrideNEON = SrcVT.getVectorElementType() == MVT::i64 && - Op.getOpcode() != ISD::VECREDUCE_ADD; + bool OverrideNEON = Op.getOpcode() != ISD::VECREDUCE_ADD && + SrcVT.getVectorElementType() == MVT::i64; if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { switch (Op.getOpcode()) { case ISD::VECREDUCE_ADD: @@ -9676,6 +9678,10 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, return LowerFixedLengthReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG); case ISD::VECREDUCE_UMIN: return LowerFixedLengthReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG); + case ISD::VECREDUCE_FMAX: + return LowerFixedLengthReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG); + case ISD::VECREDUCE_FMIN: + return LowerFixedLengthReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG); default: llvm_unreachable("Unhandled fixed length reduction"); } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll new file mode 100644 index 0000000000000..68501a797178a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll @@ -0,0 +1,445 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; FMAXV +; + +; No NEON 16-bit vector FMAXNMV support. Use SVE. +define half @fmaxv_v4f16(<4 x half> %a) #0 { +; CHECK-LABEL: fmaxv_v4f16: +; CHECK: fmaxnmv h0, v0.4h +; CHECK-NEXT: ret + %res = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a) + ret half %res +} + +; No NEON 16-bit vector FMAXNMV support. Use SVE. +define half @fmaxv_v8f16(<8 x half> %a) #0 { +; CHECK-LABEL: fmaxv_v8f16: +; CHECK: fmaxnmv h0, v0.8h +; CHECK-NEXT: ret + %res = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %a) + ret half %res +} + +define half @fmaxv_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: fmaxv_v16f16: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: fmaxnmv h0, [[PG]], [[OP]].h +; VBITS_GE_256-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %op) + ret half %res +} + +define half @fmaxv_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: fmaxv_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: fmaxnmv h0, [[PG]], [[OP]].h +; VBITS_GE_512-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmax.v32f16(<32 x half> %op) + ret half %res +} + +define half @fmaxv_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: fmaxv_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: fmaxnmv h0, [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmax.v64f16(<64 x half> %op) + ret half %res +} + +define half @fmaxv_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: fmaxv_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: fmaxnmv h0, [[PG]], [[OP]].h +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmax.v128f16(<128 x half> %op) + ret half %res +} + +; Don't use SVE for 64-bit f32 vectors. +define float @fmaxv_v2f32(<2 x float> %a) #0 { +; CHECK-LABEL: fmaxv_v2f32: +; CHECK: fmaxnmp s0, v0.2s +; CHECK: ret + %res = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a) + ret float %res +} + +; Don't use SVE for 128-bit f32 vectors. +define float @fmaxv_v4f32(<4 x float> %a) #0 { +; CHECK-LABEL: fmaxv_v4f32: +; CHECK: fmaxnmv s0, v0.4s +; CHECK: ret + %res = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a) + ret float %res +} + +define float @fmaxv_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: fmaxv_v8f32: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: fmaxnmv s0, [[PG]], [[OP]].s +; VBITS_GE_256-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %op) + ret float %res +} + +define float @fmaxv_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: fmaxv_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: fmaxnmv s0, [[PG]], [[OP]].s +; VBITS_GE_512-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %op) + ret float %res +} + +define float @fmaxv_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: fmaxv_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: fmaxnmv s0, [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> %op) + ret float %res +} + +define float @fmaxv_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: fmaxv_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: fmaxnmv s0, [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmax.v64f32(<64 x float> %op) + ret float %res +} + +; Nothing to do for single element vectors. +define double @fmaxv_v1f64(<1 x double> %a) #0 { +; CHECK-LABEL: fmaxv_v1f64: +; CHECK-NOT: fmax +; CHECK: ret + %res = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a) + ret double %res +} + +; Don't use SVE for 128-bit f64 vectors. +define double @fmaxv_v2f64(<2 x double> %a) #0 { +; CHECK-LABEL: fmaxv_v2f64: +; CHECK: fmaxnmp d0, v0.2d +; CHECK-NEXT: ret + %res = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a) + ret double %res +} + +define double @fmaxv_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: fmaxv_v4f64: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: fmaxnmv d0, [[PG]], [[OP]].d +; VBITS_GE_256-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %op) + ret double %res +} + +define double @fmaxv_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: fmaxv_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: fmaxnmv d0, [[PG]], [[OP]].d +; VBITS_GE_512-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %op) + ret double %res +} + +define double @fmaxv_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: fmaxv_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: fmaxnmv d0, [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %op) + ret double %res +} + +define double @fmaxv_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: fmaxv_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: fmaxnmv d0, [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmax.v32f64(<32 x double> %op) + ret double %res +} + +; +; FMINV +; + +; No NEON 16-bit vector FMINNMV support. Use SVE. +define half @fminv_v4f16(<4 x half> %a) #0 { +; CHECK-LABEL: fminv_v4f16: +; CHECK: fminnmv h0, v0.4h +; CHECK-NEXT: ret + %res = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a) + ret half %res +} + +; No NEON 16-bit vector FMINNMV support. Use SVE. +define half @fminv_v8f16(<8 x half> %a) #0 { +; CHECK-LABEL: fminv_v8f16: +; CHECK: fminnmv h0, v0.8h +; CHECK-NEXT: ret + %res = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %a) + ret half %res +} + +define half @fminv_v16f16(<16 x half>* %a) #0 { +; CHECK-LABEL: fminv_v16f16: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: fminnmv h0, [[PG]], [[OP]].h +; VBITS_GE_256-NEXT: ret + %op = load <16 x half>, <16 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %op) + ret half %res +} + +define half @fminv_v32f16(<32 x half>* %a) #0 { +; CHECK-LABEL: fminv_v32f16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: fminnmv h0, [[PG]], [[OP]].h +; VBITS_GE_512-NEXT: ret + %op = load <32 x half>, <32 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmin.v32f16(<32 x half> %op) + ret half %res +} + +define half @fminv_v64f16(<64 x half>* %a) #0 { +; CHECK-LABEL: fminv_v64f16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: fminnmv h0, [[PG]], [[OP]].h +; VBITS_GE_1024-NEXT: ret + %op = load <64 x half>, <64 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmin.v64f16(<64 x half> %op) + ret half %res +} + +define half @fminv_v128f16(<128 x half>* %a) #0 { +; CHECK-LABEL: fminv_v128f16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: fminnmv h0, [[PG]], [[OP]].h +; VBITS_GE_2048-NEXT: ret + %op = load <128 x half>, <128 x half>* %a + %res = call half @llvm.experimental.vector.reduce.fmin.v128f16(<128 x half> %op) + ret half %res +} + +; Don't use SVE for 64-bit f32 vectors. +define float @fminv_v2f32(<2 x float> %a) #0 { +; CHECK-LABEL: fminv_v2f32: +; CHECK: fminnmp s0, v0.2s +; CHECK: ret + %res = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a) + ret float %res +} + +; Don't use SVE for 128-bit f32 vectors. +define float @fminv_v4f32(<4 x float> %a) #0 { +; CHECK-LABEL: fminv_v4f32: +; CHECK: fminnmv s0, v0.4s +; CHECK: ret + %res = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a) + ret float %res +} + +define float @fminv_v8f32(<8 x float>* %a) #0 { +; CHECK-LABEL: fminv_v8f32: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: fminnmv s0, [[PG]], [[OP]].s +; VBITS_GE_256-NEXT: ret + %op = load <8 x float>, <8 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %op) + ret float %res +} + +define float @fminv_v16f32(<16 x float>* %a) #0 { +; CHECK-LABEL: fminv_v16f32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: fminnmv s0, [[PG]], [[OP]].s +; VBITS_GE_512-NEXT: ret + %op = load <16 x float>, <16 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %op) + ret float %res +} + +define float @fminv_v32f32(<32 x float>* %a) #0 { +; CHECK-LABEL: fminv_v32f32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: fminnmv s0, [[PG]], [[OP]].s +; VBITS_GE_1024-NEXT: ret + %op = load <32 x float>, <32 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> %op) + ret float %res +} + +define float @fminv_v64f32(<64 x float>* %a) #0 { +; CHECK-LABEL: fminv_v64f32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: fminnmv s0, [[PG]], [[OP]].s +; VBITS_GE_2048-NEXT: ret + %op = load <64 x float>, <64 x float>* %a + %res = call float @llvm.experimental.vector.reduce.fmin.v64f32(<64 x float> %op) + ret float %res +} + +; Nothing to do for single element vectors. +define double @fminv_v1f64(<1 x double> %a) #0 { +; CHECK-LABEL: fminv_v1f64: +; CHECK-NOT: fmin +; CHECK: ret + %res = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a) + ret double %res +} + +; Don't use SVE for 128-bit f64 vectors. +define double @fminv_v2f64(<2 x double> %a) #0 { +; CHECK-LABEL: fminv_v2f64: +; CHECK: fminnmp d0, v0.2d +; CHECK-NEXT: ret + %res = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a) + ret double %res +} + +define double @fminv_v4f64(<4 x double>* %a) #0 { +; CHECK-LABEL: fminv_v4f64: +; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_256-NEXT: fminnmv d0, [[PG]], [[OP]].d +; VBITS_GE_256-NEXT: ret + %op = load <4 x double>, <4 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %op) + ret double %res +} + +define double @fminv_v8f64(<8 x double>* %a) #0 { +; CHECK-LABEL: fminv_v8f64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-NEXT: fminnmv d0, [[PG]], [[OP]].d +; VBITS_GE_512-NEXT: ret + %op = load <8 x double>, <8 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %op) + ret double %res +} + +define double @fminv_v16f64(<16 x double>* %a) #0 { +; CHECK-LABEL: fminv_v16f64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-NEXT: fminnmv d0, [[PG]], [[OP]].d +; VBITS_GE_1024-NEXT: ret + %op = load <16 x double>, <16 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %op) + ret double %res +} + +define double @fminv_v32f64(<32 x double>* %a) #0 { +; CHECK-LABEL: fminv_v32f64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-NEXT: fminnmv d0, [[PG]], [[OP]].d +; VBITS_GE_2048-NEXT: ret + %op = load <32 x double>, <32 x double>* %a + %res = call double @llvm.experimental.vector.reduce.fmin.v32f64(<32 x double> %op) + ret double %res +} + +attributes #0 = { "target-features"="+sve" } + +declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>) +declare half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half>) +declare half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half>) +declare half @llvm.experimental.vector.reduce.fmax.v32f16(<32 x half>) +declare half @llvm.experimental.vector.reduce.fmax.v64f16(<64 x half>) +declare half @llvm.experimental.vector.reduce.fmax.v128f16(<128 x half>) + +declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float>) +declare float @llvm.experimental.vector.reduce.fmax.v64f32(<64 x float>) + +declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>) +declare double @llvm.experimental.vector.reduce.fmax.v32f64(<32 x double>) + +declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>) +declare half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half>) +declare half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half>) +declare half @llvm.experimental.vector.reduce.fmin.v32f16(<32 x half>) +declare half @llvm.experimental.vector.reduce.fmin.v64f16(<64 x half>) +declare half @llvm.experimental.vector.reduce.fmin.v128f16(<128 x half>) + +declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float>) +declare float @llvm.experimental.vector.reduce.fmin.v64f32(<64 x float>) + +declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>) +declare double @llvm.experimental.vector.reduce.fmin.v32f64(<32 x double>) From b5543063e1bfd6195a2d34d2c892466c0050e08a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 29 Sep 2020 16:30:07 -0500 Subject: [PATCH 093/544] [flang][msvc] Define implicit conversion from UnsignedInt128 to int64_t. The custom implementation of UnsignedInt128 has an implicit conversion operator to unit64_t, but not int64_t. Considering that the former is already truncating, and C++ implicitly converts uint64_t to int64_t, UnsignedInt128 should also support an implicit conversion to int64_t. An analogous conversion would be from uint32_t to int16_t. Without the conversion operator overload, the msvc emits the following error: ``` descriptor-io.h(44): error C2440: 'static_cast': cannot convert from 'A' to 'int64_t' with [ A=Fortran::common::uint128_t ] ``` This patch is part of the series to make flang compilable with MS Visual Studio . Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D88509 --- flang/include/flang/Common/uint128.h | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/include/flang/Common/uint128.h b/flang/include/flang/Common/uint128.h index eecf4a8ba1149..0ed3cf1f385d0 100644 --- a/flang/include/flang/Common/uint128.h +++ b/flang/include/flang/Common/uint128.h @@ -53,6 +53,7 @@ class UnsignedInt128 { constexpr bool operator!() const { return !low_ && !high_; } constexpr explicit operator bool() const { return low_ || high_; } constexpr explicit operator std::uint64_t() const { return low_; } + constexpr explicit operator std::int64_t() const { return low_; } constexpr explicit operator int() const { return static_cast(low_); } constexpr std::uint64_t high() const { return high_; } From bcd05599d0e53977a963799d6ee4f6e0bc21331b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 29 Sep 2020 16:57:05 -0500 Subject: [PATCH 094/544] [flang][msvc] Define access flags under Windows. NFC. The flags F_OK, R_OK and W_OK are defined in unistd.h, which does not exist under the Windows platform. Windows still defines the `access` function. Its access flags are documented at https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/access-waccess. For compatibility, define the flags F_OK, R_OK and W_OK using these constants. This patch is part of the series to make flang compilable with MS Visual Studio . Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D88508 --- flang/runtime/file.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp index 8fc81efe6c93c..fa59567f6c105 100644 --- a/flang/runtime/file.cpp +++ b/flang/runtime/file.cpp @@ -397,6 +397,15 @@ int OpenFile::PendingResult(const Terminator &terminator, int iostat) { bool IsATerminal(int fd) { return ::isatty(fd); } +#ifdef WIN32 +// Access flags are normally defined in unistd.h, which unavailable under +// Windows. Instead, define the flags as documented at +// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/access-waccess +#define F_OK 00 +#define W_OK 02 +#define R_OK 04 +#endif + bool IsExtant(const char *path) { return ::access(path, F_OK) == 0; } bool MayRead(const char *path) { return ::access(path, R_OK) == 0; } bool MayWrite(const char *path) { return ::access(path, W_OK) == 0; } From d256797c9035aebf0309489c04dc34f8bae49dc4 Mon Sep 17 00:00:00 2001 From: JonChesterfield Date: Tue, 29 Sep 2020 23:11:46 +0100 Subject: [PATCH 095/544] [nfc][libomptarget] Drop parameter to named_sync [nfc][libomptarget] Drop parameter to named_sync named_sync has one call site (in sync.cu) where it always passed L1_BARRIER. Folding this into the call site and dropping the macro is a simplification. amdgpu doesn't have ptx' bar.sync instruction. A correct implementation of __kmpc_impl_named_sync in terms of shared memory is much easier if it can assume that the barrier argument is this constant. Said implementation is left for a second patch. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D88474 --- .../libomptarget/deviceRTLs/amdgcn/src/target_impl.h | 11 +++-------- openmp/libomptarget/deviceRTLs/common/src/sync.cu | 3 +-- .../libomptarget/deviceRTLs/nvptx/src/target_impl.h | 9 ++++----- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index 3c90b39282c94..8afc5e77996af 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -42,10 +42,6 @@ #define WARPSIZE 64 -// The named barrier for active parallel threads of a team in an L1 parallel -// region to synchronize with each other. -#define L1_BARRIER (1) - // Maximum number of preallocated arguments to an outlined parallel/simd // function. Anything more requires dynamic memory allocation. #define MAX_SHARED_ARGS 20 @@ -113,10 +109,9 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) { // AMDGCN doesn't need to sync threads in a warp } -INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { - // we have protected the master warp from releasing from its barrier - // due to a full workgroup barrier in the middle of a work function. - // So it is ok to issue a full workgroup barrier here. +INLINE void __kmpc_impl_named_sync(uint32_t num_threads) { + (void)num_threads; + // TODO: Implement on top of __SHARED__ __builtin_amdgcn_s_barrier(); } diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu index 3979e2054fc9e..824094cc3f787 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu @@ -60,8 +60,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) { PRINT(LD_SYNC, "call kmpc_barrier with %d omp threads, sync parameter %d\n", (int)numberOfActiveOMPThreads, (int)threads); - // Barrier #1 is for synchronization among active threads. - __kmpc_impl_named_sync(L1_BARRIER, threads); + __kmpc_impl_named_sync(threads); } } else { // Still need to flush the memory per the standard. diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index e3a3d0f56c4e7..f7bc7e14c5284 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -37,10 +37,6 @@ #define WARPSIZE 32 -// The named barrier for active parallel threads of a team in an L1 parallel -// region to synchronize with each other. -#define L1_BARRIER (1) - // Maximum number of preallocated arguments to an outlined parallel/simd function. // Anything more requires dynamic memory allocation. #define MAX_SHARED_ARGS 20 @@ -187,7 +183,10 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) { #endif // CUDA_VERSION } -INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { +INLINE void __kmpc_impl_named_sync(uint32_t num_threads) { + // The named barrier for active parallel threads of a team in an L1 parallel + // region to synchronize with each other. + int barrier = 1; asm volatile("bar.sync %0, %1;" : : "r"(barrier), "r"(num_threads) From bf434a5f173eed4112a10e28e8a6236d48f9da07 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Fri, 31 Jul 2020 15:03:21 -0700 Subject: [PATCH 096/544] Improve the representation of 's zero-only type. * Use an empty struct instead of a member pointer to represent this type, so that we don't actually pass a zero member pointer at runtime. * Mark the constructor as consteval to ensure that no code is emitted for it whenever possible. * Add a honeypot constructor to reject all non-int arguments, so that the only argument that can arrive at the real constructor is the literal 0. This results in better generated code, and rejecting invalid comparisons against nullptr, 0L, and so on, while also rejecting invalid comparisons against (1-1) and similar that would be allowed if we required an integer constant expression with value 0. Differential Revision: https://reviews.llvm.org/D85051 --- libcxx/include/__config | 6 ++ libcxx/include/compare | 9 ++- .../cmp.categories.pre/zero_type.verify.cpp | 60 +++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp diff --git a/libcxx/include/__config b/libcxx/include/__config index c29fd4267f323..1b87a6b439965 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -837,6 +837,12 @@ typedef unsigned int char32_t; # define _LIBCPP_CONSTEXPR constexpr #endif +#ifndef __cpp_consteval +# define _LIBCPP_CONSTEVAL _LIBCPP_CONSTEXPR +#else +# define _LIBCPP_CONSTEVAL consteval +#endif + #ifdef _LIBCPP_CXX03_LANG # define _LIBCPP_DEFAULT {} #else diff --git a/libcxx/include/compare b/libcxx/include/compare index 717859a1e3af5..c1cd81bb6fc1a 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -154,8 +154,13 @@ enum class _LIBCPP_ENUM_VIS _NCmpResult : signed char { __unordered = -127 }; -struct _CmpUnspecifiedType; -using _CmpUnspecifiedParam = void (_CmpUnspecifiedType::*)(); +struct _CmpUnspecifiedParam { + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEVAL + _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) {} + + template>> + _CmpUnspecifiedParam(_Tp) = delete; +}; class weak_equality { _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp new file mode 100644 index 0000000000000..40f6677d43c9b --- /dev/null +++ b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp @@ -0,0 +1,60 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +// Ensure we reject all cases where an argument other than a literal 0 is used +// for a comparison against a comparison category type. + +#include + +#define TEST_OP(v, op) \ + void(v op 0L); \ + void(0L op v); \ + void(v op nullptr); \ + void(nullptr op v); \ + void(v op(1 - 1)); \ + void((1 - 1) op v); + +template +void test_category(T v) { + TEST_OP(v, ==); // expected-error 18 {{}} + TEST_OP(v, !=); // expected-error 18 {{}} + TEST_OP(v, <); // expected-error 18 {{}} + TEST_OP(v, <=); // expected-error 18 {{}} + TEST_OP(v, >); // expected-error 18 {{}} + TEST_OP(v, >=); // expected-error 18 {{}} + TEST_OP(v, <=>); // expected-error 18 {{}} + + void(v == 0); + void(0 == v); + void(v != 0); + void(0 != v); + void(v < 0); + void(0 < v); + void(v <= 0); + void(0 <= v); + void(v > 0); + void(0 > v); + void(v >= 0); + void(0 >= v); +#ifndef _LIBCPP_HAS_NO_THREE_WAY_COMPARISON + void(v <=> 0); // expected-error 3 {{}} + void(0 <=> v); // expected-error 3 {{}} +#endif +} + +int main(int, char**) { + test_category(std::strong_ordering::equivalent); + test_category(std::weak_ordering::equivalent); + test_category(std::partial_ordering::equivalent); + return 0; +} From 1c604a9f5fd65c91f097c856fa6643373fc869e1 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 29 Sep 2020 15:20:11 -0700 Subject: [PATCH 097/544] Recognize setjmp and friends as builtins even if jmp_buf is not declared yet. This happens in glibc's headers. It's important that we recognize these functions so that we can mark them as returns_twice. Differential Revision: https://reviews.llvm.org/D88518 --- clang/include/clang/Basic/Builtins.def | 25 +++++++------- clang/include/clang/Basic/Builtins.h | 7 ++++ clang/lib/Sema/SemaDecl.cpp | 22 ++++++------- clang/test/CodeGen/setjmp.c | 44 +++++++++++++++++++++++++ clang/test/Sema/builtin-setjmp.c | 44 +++++++++++++++++++++---- clang/test/Sema/implicit-builtin-decl.c | 5 ++- 6 files changed, 115 insertions(+), 32 deletions(-) create mode 100644 clang/test/CodeGen/setjmp.c diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index efefe62c4a2c6..d001b0bea9e6d 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -75,6 +75,9 @@ // U -> pure // c -> const // t -> signature is meaningless, use custom typechecking +// T -> type is not important to semantic analysis and codegen; recognize as +// builtin even if type doesn't match signature, and don't warn if we +// can't be sure the type is right // F -> this is a libc/libm function with a '__builtin_' prefix added. // f -> this is a libc/libm function without the '__builtin_' prefix. It can // be followed by ':headername:' to state which header this function @@ -896,7 +899,7 @@ LANGBUILTIN(__va_start, "vc**.", "nt", ALL_MS_LANGUAGES) LANGBUILTIN(__fastfail, "vUi", "nr", ALL_MS_LANGUAGES) // Microsoft library builtins. -LIBBUILTIN(_setjmpex, "iJ", "fj", "setjmpex.h", ALL_MS_LANGUAGES) +LIBBUILTIN(_setjmpex, "iJ", "fjT", "setjmpex.h", ALL_MS_LANGUAGES) // C99 library functions // C99 stdarg.h @@ -990,8 +993,8 @@ LIBBUILTIN(wmemmove,"w*w*wC*z", "f", "wchar.h", ALL_LANGUAGES) // In some systems setjmp is a macro that expands to _setjmp. We undefine // it here to avoid having two identical LIBBUILTIN entries. #undef setjmp -LIBBUILTIN(setjmp, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(longjmp, "vJi", "fr", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(setjmp, "iJ", "fjT", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(longjmp, "vJi", "frT", "setjmp.h", ALL_LANGUAGES) // Non-C library functions, active in GNU mode only. // Functions with (returns_twice) attribute (marked as "j") are still active in @@ -1018,21 +1021,21 @@ LIBBUILTIN(strcasecmp, "icC*cC*", "f", "strings.h", ALL_GNU_LANGUAGES) LIBBUILTIN(strncasecmp, "icC*cC*z", "f", "strings.h", ALL_GNU_LANGUAGES) // POSIX unistd.h LIBBUILTIN(_exit, "vi", "fr", "unistd.h", ALL_GNU_LANGUAGES) -LIBBUILTIN(vfork, "p", "fj", "unistd.h", ALL_LANGUAGES) +LIBBUILTIN(vfork, "p", "fjT", "unistd.h", ALL_LANGUAGES) // POSIX pthread.h // FIXME: Should specify argument types. LIBBUILTIN(pthread_create, "", "fC<2,3>", "pthread.h", ALL_GNU_LANGUAGES) // POSIX setjmp.h -LIBBUILTIN(_setjmp, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(__sigsetjmp, "iSJi", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(sigsetjmp, "iSJi", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(savectx, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(getcontext, "iK*", "fj", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(_setjmp, "iJ", "fjT", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(__sigsetjmp, "iSJi", "fjT", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(sigsetjmp, "iSJi", "fjT", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(savectx, "iJ", "fjT", "setjmp.h", ALL_LANGUAGES) +LIBBUILTIN(getcontext, "iK*", "fjT", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(_longjmp, "vJi", "fr", "setjmp.h", ALL_GNU_LANGUAGES) -LIBBUILTIN(siglongjmp, "vSJi", "fr", "setjmp.h", ALL_GNU_LANGUAGES) +LIBBUILTIN(_longjmp, "vJi", "frT", "setjmp.h", ALL_GNU_LANGUAGES) +LIBBUILTIN(siglongjmp, "vSJi", "frT", "setjmp.h", ALL_GNU_LANGUAGES) // non-standard but very common LIBBUILTIN(strlcpy, "zc*cC*z", "f", "string.h", ALL_GNU_LANGUAGES) LIBBUILTIN(strlcat, "zc*cC*z", "f", "string.h", ALL_GNU_LANGUAGES) diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h index e4ed482d90688..15bfcf797917c 100644 --- a/clang/include/clang/Basic/Builtins.h +++ b/clang/include/clang/Basic/Builtins.h @@ -158,6 +158,13 @@ class Context { return strchr(getRecord(ID).Attributes, 't') != nullptr; } + /// Determines whether a declaration of this builtin should be recognized + /// even if the type doesn't match the specified signature. + bool allowTypeMismatch(unsigned ID) const { + return strchr(getRecord(ID).Attributes, 'T') != nullptr || + hasCustomTypechecking(ID); + } + /// Determines whether this builtin has a result or any arguments which /// are pointer types. bool hasPtrArgsOrResult(unsigned ID) const { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 1c3c484196803..c92d906580eba 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2105,7 +2105,8 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID, // If we have a builtin without an associated type we should not emit a // warning when we were not able to find a type for it. - if (Error == ASTContext::GE_Missing_type) + if (Error == ASTContext::GE_Missing_type || + Context.BuiltinInfo.allowTypeMismatch(ID)) return nullptr; // If we could not find a type for setjmp it is because the jmp_buf type was @@ -2129,11 +2130,9 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID, Context.BuiltinInfo.isHeaderDependentFunction(ID))) { Diag(Loc, diag::ext_implicit_lib_function_decl) << Context.BuiltinInfo.getName(ID) << R; - if (Context.BuiltinInfo.getHeaderName(ID) && - !Diags.isIgnored(diag::ext_implicit_lib_function_decl, Loc)) + if (const char *Header = Context.BuiltinInfo.getHeaderName(ID)) Diag(Loc, diag::note_include_header_or_declare) - << Context.BuiltinInfo.getHeaderName(ID) - << Context.BuiltinInfo.getName(ID); + << Header << Context.BuiltinInfo.getName(ID); } if (R.isNull()) @@ -9642,17 +9641,16 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } } - // In C builtins get merged with implicitly lazily created declarations. - // In C++ we need to check if it's a builtin and add the BuiltinAttr here. - if (getLangOpts().CPlusPlus && + // If this is the first declaration of a library builtin function, add + // attributes as appropriate. + if (!D.isRedeclaration() && NewFD->getDeclContext()->getRedeclContext()->isFileContext()) { if (IdentifierInfo *II = Previous.getLookupName().getAsIdentifierInfo()) { if (unsigned BuiltinID = II->getBuiltinID()) { if (NewFD->getLanguageLinkage() == CLanguageLinkage) { - // Declarations for builtins with custom typechecking by definition - // don't make sense. Don't attempt typechecking and simply add the - // attribute. - if (Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) { + // Validate the type matches unless this builtin is specified as + // matching regardless of its declared type. + if (Context.BuiltinInfo.allowTypeMismatch(BuiltinID)) { NewFD->addAttr(BuiltinAttr::CreateImplicit(Context, BuiltinID)); } else { ASTContext::GetBuiltinTypeError Error; diff --git a/clang/test/CodeGen/setjmp.c b/clang/test/CodeGen/setjmp.c new file mode 100644 index 0000000000000..4ca360d8584cd --- /dev/null +++ b/clang/test/CodeGen/setjmp.c @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -x c %s -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ %s -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s + +#ifdef __cplusplus +extern "C" { +#endif + +struct __jmp_buf_tag { int n; }; +int setjmp(struct __jmp_buf_tag*); +int sigsetjmp(struct __jmp_buf_tag*, int); +int _setjmp(struct __jmp_buf_tag*); +int __sigsetjmp(struct __jmp_buf_tag*, int); + +typedef struct __jmp_buf_tag jmp_buf[1]; +typedef struct __jmp_buf_tag sigjmp_buf[1]; + +#ifdef __cplusplus +} +#endif + +void f() { + jmp_buf jb; + // CHECK: call {{.*}}@setjmp( + setjmp(jb); + // CHECK: call {{.*}}@sigsetjmp( + sigsetjmp(jb, 0); + // CHECK: call {{.*}}@_setjmp( + _setjmp(jb); + // CHECK: call {{.*}}@__sigsetjmp( + __sigsetjmp(jb, 0); +} + +// CHECK: ; Function Attrs: returns_twice +// CHECK-NEXT: declare {{.*}} @setjmp( + +// CHECK: ; Function Attrs: returns_twice +// CHECK-NEXT: declare {{.*}} @sigsetjmp( + +// CHECK: ; Function Attrs: returns_twice +// CHECK-NEXT: declare {{.*}} @_setjmp( + +// CHECK: ; Function Attrs: returns_twice +// CHECK-NEXT: declare {{.*}} @__sigsetjmp( + diff --git a/clang/test/Sema/builtin-setjmp.c b/clang/test/Sema/builtin-setjmp.c index f8770d88e731f..6a114fad05d9d 100644 --- a/clang/test/Sema/builtin-setjmp.c +++ b/clang/test/Sema/builtin-setjmp.c @@ -1,10 +1,42 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s -ast-dump | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DWRONG_JMP_BUF %s -ast-dump | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DRIGHT_JMP_BUF %s -ast-dump | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DONLY_JMP_BUF %s -ast-dump | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_SETJMP %s -ast-dump 2>&1 | FileCheck %s #ifdef NO_JMP_BUF -extern long setjmp(long *); // expected-warning {{declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}} -#else +// This happens in some versions of glibc: the declaration of __sigsetjmp +// precedes the declaration of sigjmp_buf. +extern long setjmp(long *); // Can't check, so we trust that this is the right type +// FIXME: We could still diagnose the missing `jmp_buf` at the point of the call. +// expected-no-diagnostics +#elif WRONG_JMP_BUF typedef long jmp_buf; -extern int setjmp(char); // expected-warning@8 {{incompatible redeclaration of library function 'setjmp'}} - // expected-note@8 {{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}} +extern int setjmp(char); // expected-warning {{incompatible redeclaration of library function 'setjmp'}} + // expected-note@-1 {{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}} +#elif RIGHT_JMP_BUF +typedef long jmp_buf; +extern int setjmp(long); // OK, right type. +// expected-no-diagnostics +#elif ONLY_JMP_BUF +typedef int *jmp_buf; #endif + +void use() { + setjmp(0); + #ifdef NO_SETJMP + // expected-warning@-2 {{implicit declaration of function 'setjmp' is invalid in C99}} + #elif ONLY_JMP_BUF + // expected-warning@-4 {{implicitly declaring library function 'setjmp' with type 'int (jmp_buf)' (aka 'int (int *)')}} + // expected-note@-5 {{include the header or explicitly provide a declaration for 'setjmp'}} + #endif + + #ifdef NO_SETJMP + // In this case, the regular AST dump doesn't dump the implicit declaration of 'setjmp'. + #pragma clang __debug dump setjmp + #endif +} + +// CHECK: FunctionDecl {{.*}} used setjmp +// CHECK: BuiltinAttr {{.*}} Implicit +// CHECK: ReturnsTwiceAttr {{.*}} Implicit diff --git a/clang/test/Sema/implicit-builtin-decl.c b/clang/test/Sema/implicit-builtin-decl.c index b25e86bc03a33..9434b507a3af3 100644 --- a/clang/test/Sema/implicit-builtin-decl.c +++ b/clang/test/Sema/implicit-builtin-decl.c @@ -54,13 +54,12 @@ main(int argc, char *argv[]) void snprintf() { } -// PR8316 & PR40692 -void longjmp(); // expected-warning{{declaration of built-in function 'longjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}} +void longjmp(); extern float fmaxf(float, float); struct __jmp_buf_tag {}; -void sigsetjmp(struct __jmp_buf_tag[1], int); // expected-warning{{declaration of built-in function 'sigsetjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header .}} +void sigsetjmp(struct __jmp_buf_tag[1], int); // PR40692 void pthread_create(); // no warning expected From 61b3106965d7c4c696d8e4cd272273011bb50bc0 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 29 Sep 2020 15:32:04 -0700 Subject: [PATCH 098/544] [AMDGPU] Remove SIEncodingFamily.GFX10_B It turns to be not needed anymore. Differential Revision: https://reviews.llvm.org/D88520 --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7fdbe2afa033c..78c095d608a3e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -28,7 +28,6 @@ def SIEncodingFamily { int GFX9 = 5; int GFX10 = 6; int SDWA10 = 7; - int GFX10_B = 8; } //===----------------------------------------------------------------------===// @@ -2500,8 +2499,7 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.GFX80)], [!cast(SIEncodingFamily.GFX9)], [!cast(SIEncodingFamily.GFX10)], - [!cast(SIEncodingFamily.SDWA10)], - [!cast(SIEncodingFamily.GFX10_B)]]; + [!cast(SIEncodingFamily.SDWA10)]]; } // Get equivalent SOPK instruction. From 2f95c50a8b713970c5134dabc246270111a48c6d Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 29 Sep 2020 16:01:25 -0700 Subject: [PATCH 099/544] Fix use of wrong printf format specifier for size_t argument. This causes a build break under -Werror=format. --- lldb/bindings/python/python-wrapper.swig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index c00deba6073b4..443ddfb8dd204 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -521,7 +521,7 @@ LLDBSwigPythonCreateScriptedStopHook size_t num_args = (*args_info).max_positional_args; if (num_args != 2) { error.SetErrorStringWithFormat("Wrong number of args for " - "handle_stop callback, should be 2 (excluding self), got: %d", + "handle_stop callback, should be 2 (excluding self), got: %zu", num_args); Py_RETURN_NONE; } else From 26ee8aff2b85ee28a2b2d0b1860d878b512fbdef Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 29 Sep 2020 16:32:06 -0700 Subject: [PATCH 100/544] [CodeExtractor] Don't create bitcasts when inserting lifetime markers (NFCI) Lifetime marker intrinsics support any pointer type, so CodeExtractor does not need to bitcast to `i8*` in order to use these markers. --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 35 +++++-------------- .../PartialInlineInvokeProducesOutVal.ll | 5 ++- .../lifetime-markers-on-inputs-1.ll | 8 ++--- .../lifetime-markers-on-inputs-2.ll | 15 ++++---- .../HotColdSplit/split-phis-in-exit-blocks.ll | 3 +- 5 files changed, 21 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index b940f2e710958..73201106c4e4c 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1024,32 +1024,21 @@ static void insertLifetimeMarkersSurroundingCall( Module *M, ArrayRef LifetimesStart, ArrayRef LifetimesEnd, CallInst *TheCall) { LLVMContext &Ctx = M->getContext(); - auto Int8PtrTy = Type::getInt8PtrTy(Ctx); auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); Instruction *Term = TheCall->getParent()->getTerminator(); - // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts - // needed to satisfy this requirement so they may be reused. - DenseMap Bitcasts; - // Emit lifetime markers for the pointers given in \p Objects. Insert the // markers before the call if \p InsertBefore, and after the call otherwise. - auto insertMarkers = [&](Function *MarkerFunc, ArrayRef Objects, + auto insertMarkers = [&](Intrinsic::ID IID, ArrayRef Objects, bool InsertBefore) { for (Value *Mem : Objects) { assert((!isa(Mem) || cast(Mem)->getFunction() == TheCall->getFunction()) && "Input memory not defined in original function"); - Value *&MemAsI8Ptr = Bitcasts[Mem]; - if (!MemAsI8Ptr) { - if (Mem->getType() == Int8PtrTy) - MemAsI8Ptr = Mem; - else - MemAsI8Ptr = - CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); - } - - auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr}); + assert(Mem->getType()->isPointerTy() && "Expected pointer to memory"); + Function *MarkerFunc = + llvm::Intrinsic::getDeclaration(M, IID, Mem->getType()); + auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, Mem}); if (InsertBefore) Marker->insertBefore(TheCall); else @@ -1057,17 +1046,9 @@ static void insertLifetimeMarkersSurroundingCall( } }; - if (!LifetimesStart.empty()) { - auto StartFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_start, Int8PtrTy); - insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true); - } - - if (!LifetimesEnd.empty()) { - auto EndFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_end, Int8PtrTy); - insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false); - } + insertMarkers(Intrinsic::lifetime_start, LifetimesStart, + /*InsertBefore=*/true); + insertMarkers(Intrinsic::lifetime_end, LifetimesEnd, /*InsertBefore=*/false); } /// emitCallAndSwitchStatement - This method sets up the caller side by adding diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll index 2e0fbf6073ea7..32013579f1844 100644 --- a/llvm/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll @@ -26,11 +26,10 @@ bb5: ; preds = %bb4, %bb1, %bb ; CHECK-LABEL: bb: ; CHECK-NEXT: [[CALL26LOC:%.*]] = alloca i8* ; CHECK-LABEL: codeRepl.i: -; CHECK-NEXT: %lt.cast.i = bitcast i8** [[CALL26LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast.i) +; CHECK-NEXT: call void @llvm.lifetime.start.p0p0i8(i64 -1, i8** [[CALL26LOC]]) ; CHECK-NEXT: call void @bar.1.bb1(i8** [[CALL26LOC]]) ; CHECK-NEXT: %call26.reload.i = load i8*, i8** [[CALL26LOC]] -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast.i) +; CHECK-NEXT: call void @llvm.lifetime.end.p0p0i8(i64 -1, i8** [[CALL26LOC]]) define i8* @dummy_caller(i32 %arg) { bb: %tmp = tail call i8* @bar(i32 %arg) diff --git a/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll b/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll index 6d9214482c8ce..d8afa44d514ff 100644 --- a/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll +++ b/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll @@ -29,10 +29,8 @@ normalPath: ret void ; CHECK-LABEL: codeRepl: -; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) -; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK: call void @llvm.lifetime.start.p0i256(i64 -1, i256* %local1) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i256(i64 -1, i256* %local2) ; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) ; CHECK-NEXT: br i1 @@ -61,4 +59,4 @@ outlinedPathExit: } ; CHECK-LABEL: define {{.*}}@foo.cold.1( -; CHECK-NOT: @llvm.lifetime +; CHECK-NOT: call void @llvm.lifetime diff --git a/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll b/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll index e0df965632abf..3d5a3bb8636ac 100644 --- a/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll +++ b/llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll @@ -37,13 +37,12 @@ declare void @use(i8*) define void @only_lifetime_start_is_cold() { ; CHECK-LABEL: @only_lifetime_start_is_cold( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256, align 8 ; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* ; CHECK-NEXT: br i1 undef, label [[CODEREPL:%.*]], label [[NO_EXTRACT1:%.*]] ; CHECK: codeRepl: -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @only_lifetime_start_is_cold.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i256(i64 -1, i256* [[LOCAL1]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @only_lifetime_start_is_cold.cold.1(i8* [[LOCAL1_CAST]]) [[ATTR3:#.*]] ; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[NO_EXTRACT1]], label [[EXIT:%.*]] ; CHECK: no-extract1: ; CHECK-NEXT: br label [[EXIT]] @@ -98,7 +97,7 @@ exit: define void @only_lifetime_end_is_cold() { ; CHECK-LABEL: @only_lifetime_end_is_cold( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256, align 8 ; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[LOCAL1_CAST]]) ; CHECK-NEXT: br i1 undef, label [[NO_EXTRACT1:%.*]], label [[CODEREPL:%.*]] @@ -106,7 +105,7 @@ define void @only_lifetime_end_is_cold() { ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[LOCAL1_CAST]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: codeRepl: -; CHECK-NEXT: call void @only_lifetime_end_is_cold.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: call void @only_lifetime_end_is_cold.cold.1(i8* [[LOCAL1_CAST]]) [[ATTR3]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -138,7 +137,7 @@ exit: define void @do_not_lift_lifetime_end() { ; CHECK-LABEL: @do_not_lift_lifetime_end( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256, align 8 ; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[LOCAL1_CAST]]) ; CHECK-NEXT: br label [[HEADER:%.*]] @@ -146,7 +145,7 @@ define void @do_not_lift_lifetime_end() { ; CHECK-NEXT: call void @use(i8* [[LOCAL1_CAST]]) ; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[CODEREPL:%.*]] ; CHECK: codeRepl: -; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @do_not_lift_lifetime_end.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @do_not_lift_lifetime_end.cold.1(i8* [[LOCAL1_CAST]]) [[ATTR3]] ; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[HEADER]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll b/llvm/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll index 2f5360ccb1e7e..0222c57fc6688 100644 --- a/llvm/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll +++ b/llvm/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll @@ -12,8 +12,7 @@ target triple = "x86_64-apple-macosx10.14.0" ; CHECK-NEXT: ] ; ; CHECK: codeRepl: -; CHECK-NEXT: bitcast -; CHECK-NEXT: lifetime.start +; CHECK: lifetime.start ; CHECK-NEXT: call void @pluto.cold.1(i1* %tmp8.ce.loc) ; CHECK-NEXT: %tmp8.ce.reload = load i1, i1* %tmp8.ce.loc ; CHECK-NEXT: lifetime.end From 4f0e0d92178d57137e26b1ac1be5f0409791912a Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 29 Sep 2020 16:47:21 -0700 Subject: [PATCH 101/544] [mlir] Remove more OpBuilder args which are now injected NFC. Some small changes to make things more consistent but primarily avoiding old behavior without any further change. --- mlir/docs/Tutorials/Toy/Ch-2.md | 5 +- .../mlir/Dialect/Affine/IR/AffineOps.td | 75 +++----- mlir/include/mlir/Dialect/GPU/GPUOps.td | 11 +- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 138 ++++++-------- mlir/include/mlir/Dialect/SCF/SCFOps.td | 18 +- .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 11 +- .../mlir/Dialect/StandardOps/IR/Ops.td | 180 ++++++++---------- mlir/include/mlir/Dialect/Vector/VectorOps.td | 133 ++++++------- mlir/test/lib/Dialect/Test/TestOps.td | 18 +- 9 files changed, 260 insertions(+), 329 deletions(-) diff --git a/mlir/docs/Tutorials/Toy/Ch-2.md b/mlir/docs/Tutorials/Toy/Ch-2.md index cc5b380a9f62d..89134e264aa07 100644 --- a/mlir/docs/Tutorials/Toy/Ch-2.md +++ b/mlir/docs/Tutorials/Toy/Ch-2.md @@ -490,15 +490,14 @@ def ConstantOp : Toy_Op<"constant"> { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"OpBuilder &builder, OperationState &result, " - "DenseElementsAttr value", [{ + OpBuilder<"DenseElementsAttr value", [{ // Call into an autogenerated `build` method. build(builder, result, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. This builder // creates a declaration for `ConstantOp::build` with the given parameters. - OpBuilder<"OpBuilder &builder, OperationState &result, double value"> + OpBuilder<"double value"> ]; } ``` diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index 4294b88553f44..c47dcd3d5fe2d 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -73,9 +73,8 @@ def AffineApplyOp : Affine_Op<"apply", [NoSideEffect]> { // has a constant builder. That way we wouldn't need to explicitly specify the // result types here. let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "AffineMap map, ValueRange mapOperands", [{ - build(builder, result, builder.getIndexType(), map, mapOperands); + OpBuilder<"AffineMap map, ValueRange mapOperands", [{ + build($_builder, $_state, $_builder.getIndexType(), map, mapOperands); }]> ]; @@ -214,12 +213,10 @@ def AffineForOp : Affine_Op<"for", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "int64_t lowerBound, int64_t upperBound, int64_t step = 1, " + OpBuilder<"int64_t lowerBound, int64_t upperBound, int64_t step = 1, " "ValueRange iterArgs = llvm::None, function_ref bodyBuilder = nullptr">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "ValueRange lbOperands, AffineMap lbMap, " + OpBuilder<"ValueRange lbOperands, AffineMap lbMap, " "ValueRange ubOperands, AffineMap ubMap, " "int64_t step = 1, ValueRange iterArgs = llvm::None, " "function_ref " @@ -413,10 +410,8 @@ def AffineIfOp : Affine_Op<"if", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "IntegerSet set, ValueRange args, bool withElseRegion">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "TypeRange resultTypes, IntegerSet set, ValueRange args," + OpBuilder<"IntegerSet set, ValueRange args, bool withElseRegion">, + OpBuilder<"TypeRange resultTypes, IntegerSet set, ValueRange args," "bool withElseRegion">, ]; @@ -508,14 +503,11 @@ def AffineLoadOp : AffineLoadOpBase<"load"> { let builders = [ /// Builds an affine load op with the specified map and operands. - OpBuilder<"OpBuilder &builder, OperationState &result, AffineMap map, " - "ValueRange operands">, + OpBuilder<"AffineMap map, ValueRange operands">, /// Builds an affine load op with an identity map and operands. - OpBuilder<"OpBuilder &builder, OperationState &result, Value memref, " - "ValueRange indices = {}">, + OpBuilder<"Value memref, ValueRange indices = {}">, /// Builds an affine load op with the specified map and its operands. - OpBuilder<"OpBuilder &builder, OperationState &result, Value memref, " - "AffineMap map, ValueRange mapOperands"> + OpBuilder<"Value memref, AffineMap map, ValueRange mapOperands"> ]; let extraClassDeclaration = extraClassDeclarationBase; @@ -530,10 +522,9 @@ class AffineMinMaxOpBase traits = []> : let results = (outs Index); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, AffineMap affineMap, " - "ValueRange mapOperands", + OpBuilder<"AffineMap affineMap, ValueRange mapOperands", [{ - build(builder, result, builder.getIndexType(), affineMap, mapOperands); + build($_builder, $_state, $_builder.getIndexType(), affineMap, mapOperands); }]> ]; @@ -656,17 +647,14 @@ def AffineParallelOp : Affine_Op<"parallel", let regions = (region SizedRegion<1>:$region); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "TypeRange resultTypes, " + OpBuilder<"TypeRange resultTypes, " "ArrayRef reductions, " "ArrayRef ranges">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "TypeRange resultTypes, " + OpBuilder<"TypeRange resultTypes, " "ArrayRef reductions, " "AffineMap lbMap, ValueRange lbArgs, " "AffineMap ubMap, ValueRange ubArgs">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "TypeRange resultTypes, " + OpBuilder<"TypeRange resultTypes, " "ArrayRef reductions, " "AffineMap lbMap, ValueRange lbArgs, " "AffineMap ubMap, ValueRange ubArgs, " @@ -736,21 +724,20 @@ def AffinePrefetchOp : Affine_Op<"prefetch"> { BoolAttr:$isDataCache); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value memref," - "AffineMap map, ArrayRef mapOperands, bool isWrite," + "Value memref, AffineMap map, ArrayRef mapOperands, bool isWrite," "unsigned localityHint, bool isDataCache", [{ assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info"); - auto localityHintAttr = builder.getI32IntegerAttr(localityHint); - auto isWriteAttr = builder.getBoolAttr(isWrite); - auto isDataCacheAttr = builder.getBoolAttr(isDataCache); - result.addOperands(memref); - result.addAttribute(getMapAttrName(), AffineMapAttr::get(map)); - result.addOperands(mapOperands); - result.addAttribute(getLocalityHintAttrName(), localityHintAttr); - result.addAttribute(getIsWriteAttrName(), isWriteAttr); - result.addAttribute(getIsDataCacheAttrName(), isDataCacheAttr); + auto localityHintAttr = $_builder.getI32IntegerAttr(localityHint); + auto isWriteAttr = $_builder.getBoolAttr(isWrite); + auto isDataCacheAttr = $_builder.getBoolAttr(isDataCache); + $_state.addOperands(memref); + $_state.addAttribute(getMapAttrName(), AffineMapAttr::get(map)); + $_state.addOperands(mapOperands); + $_state.addAttribute(getLocalityHintAttrName(), localityHintAttr); + $_state.addAttribute(getIsWriteAttrName(), isWriteAttr); + $_state.addAttribute(getIsDataCacheAttrName(), isDataCacheAttr); }]>]; let extraClassDeclaration = [{ @@ -844,11 +831,9 @@ def AffineStoreOp : AffineStoreOpBase<"store"> { let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value valueToStore, Value memref, ValueRange indices">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value valueToStore, Value memref, AffineMap map, " - "ValueRange mapOperands"> + OpBuilder<"Value valueToStore, Value memref, ValueRange indices">, + OpBuilder<"Value valueToStore, Value memref, AffineMap map, " + "ValueRange mapOperands"> ]; let extraClassDeclaration = extraClassDeclarationBase; @@ -874,9 +859,9 @@ def AffineYieldOp : Affine_Op<"yield", [NoSideEffect, Terminator, ReturnLike]> { let arguments = (ins Variadic:$operands); - let builders = [OpBuilder< - "OpBuilder &b, OperationState &result", [{ build(b, result, llvm::None); }] - >]; + let builders = [ + OpBuilder<"", [{ build($_builder, $_state, llvm::None); }]> + ]; let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; } diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index fd43065e96937..03d0a89bbcda0 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -201,7 +201,7 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, StringRef name, " + OpBuilder<"StringRef name, " "FunctionType type, TypeRange workgroupAttributions = {}, " "TypeRange privateAttributions = {}, " "ArrayRef attrs = {}"> @@ -371,11 +371,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, GPUFuncOp kernelFunc, " + OpBuilder<"GPUFuncOp kernelFunc, " "Value gridSizeX, Value gridSizeY, Value gridSizeZ, " "Value blockSizeX, Value blockSizeY, Value blockSizeZ, " "ValueRange kernelOperands">, - OpBuilder<"OpBuilder &builder, OperationState &result, GPUFuncOp kernelFunc, " + OpBuilder<"GPUFuncOp kernelFunc, " "KernelDim3 gridSize, KernelDim3 blockSize, " "ValueRange kernelOperands"> ]; @@ -490,7 +490,7 @@ def GPU_LaunchOp : GPU_Op<"launch">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, Value gridSizeX," + OpBuilder<"Value gridSizeX," "Value gridSizeY, Value gridSizeZ, Value blockSizeX," "Value blockSizeY, Value blockSizeZ"> ]; @@ -716,8 +716,7 @@ def GPU_GPUModuleOp : GPU_Op<"module", [ ``` }]; - let builders = [OpBuilder<"OpBuilder &builder, OperationState &result, " - "StringRef name">]; + let builders = [OpBuilder<"StringRef name">]; let parser = [{ return ::parseGPUModuleOp(parser, result); }]; let printer = [{ return ::print(p, *this); }]; let regions = (region SizedRegion<1>:$body); diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 39f22855e80e3..b5b8e45eb21f2 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -23,23 +23,22 @@ class LLVM_Builder { } def LLVM_OneResultOpBuilder : OpBuilder< - "OpBuilder &, OperationState &result, Type resultType, " - "ValueRange operands, ArrayRef attributes = {}", + "Type resultType, ValueRange operands, " + "ArrayRef attributes = {}", [{ - if (resultType) result.addTypes(resultType); - result.addOperands(operands); + if (resultType) $_state.addTypes(resultType); + $_state.addOperands(operands); for (auto namedAttr : attributes) { - result.addAttribute(namedAttr.first, namedAttr.second); + $_state.addAttribute(namedAttr.first, namedAttr.second); } }]>; def LLVM_ZeroResultOpBuilder : OpBuilder< - "OpBuilder &, OperationState &result, ValueRange operands, " - "ArrayRef attributes = {}", + "ValueRange operands, ArrayRef attributes = {}", [{ - result.addOperands(operands); + $_state.addOperands(operands); for (auto namedAttr : attributes) { - result.addAttribute(namedAttr.first, namedAttr.second); + $_state.addAttribute(namedAttr.first, namedAttr.second); } }]>; @@ -56,14 +55,13 @@ class LLVM_OneResultOp traits = []> : // Compatibility builder that takes an instance of wrapped llvm::VoidType // to indicate no result. def LLVM_VoidResultTypeOpBuilder : OpBuilder< - "OpBuilder &builder, OperationState &result, Type resultType, " - "ValueRange operands, ArrayRef attributes = {}", + "Type resultType, ValueRange operands, ArrayRef attributes = {}", [{ auto llvmType = resultType.dyn_cast(); (void)llvmType; assert(llvmType && "result must be an LLVM type"); assert(llvmType.isVoidTy() && "for zero-result operands, only 'void' is accepted as result type"); - build(builder, result, operands, attributes); + build($_builder, $_state, operands, attributes); }]>; // Base class for LLVM operations with zero results. @@ -73,12 +71,12 @@ class LLVM_ZeroResultOp traits = []> : // Opaque builder used for terminator operations that contain successors. def LLVM_TerminatorPassthroughOpBuilder : OpBuilder< - "OpBuilder &, OperationState &result, ValueRange operands, " - "SuccessorRange destinations, ArrayRef attributes = {}", + "ValueRange operands, SuccessorRange destinations, " + "ArrayRef attributes = {}", [{ - result.addOperands(operands); - result.addSuccessors(destinations); - result.addAttributes(attributes); + $_state.addOperands(operands); + $_state.addSuccessors(destinations); + $_state.addAttributes(attributes); }]>; // Base class for LLVM terminator operations. All terminator operations have @@ -161,10 +159,9 @@ def LLVM_ICmpOp : LLVM_OneResultOp<"icmp", [NoSideEffect]>, $res = builder.CreateICmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, ICmpPredicate predicate, Value lhs, " - "Value rhs", [{ - build(b, result, LLVMType::getInt1Ty(lhs.getType().getContext()), - b.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); + "ICmpPredicate predicate, Value lhs, Value rhs", [{ + build($_builder, $_state, LLVMType::getInt1Ty(lhs.getType().getContext()), + $_builder.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); }]>]; let parser = [{ return parseCmpOp(parser, result); }]; let printer = [{ printICmpOp(p, *this); }]; @@ -208,10 +205,9 @@ def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>, $res = builder.CreateFCmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, FCmpPredicate predicate, Value lhs, " - "Value rhs", [{ - build(b, result, LLVMType::getInt1Ty(lhs.getType().getContext()), - b.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); + "FCmpPredicate predicate, Value lhs, Value rhs", [{ + build($_builder, $_state, LLVMType::getInt1Ty(lhs.getType().getContext()), + $_builder.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); }]>]; let parser = [{ return parseCmpOp(parser, result); }]; let printer = [{ printFCmpOp(p, *this); }]; @@ -264,12 +260,12 @@ def LLVM_AllocaOp : $res = inst; }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Type resultType, Value arraySize, " - "unsigned alignment", + "Type resultType, Value arraySize, unsigned alignment", [{ if (alignment == 0) - return build(b, result, resultType, arraySize, IntegerAttr()); - build(b, result, resultType, arraySize, b.getI64IntegerAttr(alignment)); + return build($_builder, $_state, resultType, arraySize, IntegerAttr()); + build($_builder, $_state, resultType, arraySize, + $_builder.getI64IntegerAttr(alignment)); }]>]; let parser = [{ return parseAllocaOp(parser, result); }]; let printer = [{ printAllocaOp(p, *this); }]; @@ -297,16 +293,14 @@ def LLVM_LoadOp : $res = inst; }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value addr, " - "unsigned alignment = 0, bool isVolatile = false, " + "Value addr, unsigned alignment = 0, bool isVolatile = false, " "bool isNonTemporal = false", [{ auto type = addr.getType().cast().getPointerElementTy(); - build(b, result, type, addr, alignment, isVolatile, isNonTemporal); + build($_builder, $_state, type, addr, alignment, isVolatile, isNonTemporal); }]>, OpBuilder< - "OpBuilder &b, OperationState &result, Type t, Value addr, " - "unsigned alignment = 0, bool isVolatile = false, " + "Type t, Value addr, unsigned alignment = 0, bool isVolatile = false, " "bool isNonTemporal = false">]; let parser = [{ return parseLoadOp(parser, result); }]; let printer = [{ printLoadOp(p, *this); }]; @@ -323,8 +317,7 @@ def LLVM_StoreOp : auto *inst = builder.CreateStore($value, $addr, $volatile_); }] # setAlignmentCode # setNonTemporalMetadataCode; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value value, Value addr, " - "unsigned alignment = 0, bool isVolatile = false, " + "Value value, Value addr, unsigned alignment = 0, bool isVolatile = false, " "bool isNonTemporal = false"> ]; let parser = [{ return parseStoreOp(parser, result); }]; @@ -383,19 +376,17 @@ def LLVM_InvokeOp : LLVM_Op<"invoke", [ AnySuccessor:$unwindDest); let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, TypeRange tys, " - "FlatSymbolRefAttr callee, ValueRange ops, Block* normal, " + "TypeRange tys, FlatSymbolRefAttr callee, ValueRange ops, Block* normal, " "ValueRange normalOps, Block* unwind, ValueRange unwindOps", [{ - result.addAttribute("callee", callee); - build(b, result, tys, ops, normal, normalOps, unwind, unwindOps); + $_state.addAttribute("callee", callee); + build($_builder, $_state, tys, ops, normal, normalOps, unwind, unwindOps); }]>, OpBuilder< - "OpBuilder &b, OperationState &result, TypeRange tys, " - "ValueRange ops, Block* normal, " + "TypeRange tys, ValueRange ops, Block* normal, " "ValueRange normalOps, Block* unwind, ValueRange unwindOps", [{ - build(b, result, tys, /*callee=*/FlatSymbolRefAttr(), ops, normalOps, + build($_builder, $_state, tys, /*callee=*/FlatSymbolRefAttr(), ops, normalOps, unwindOps, normal, unwind); }]>]; let verifier = [{ return ::verify(*this); }]; @@ -416,15 +407,15 @@ def LLVM_CallOp : LLVM_Op<"call">, Variadic)>, Results<(outs Variadic)> { let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, LLVMFuncOp func," - "ValueRange operands, ArrayRef attributes = {}", + "LLVMFuncOp func, ValueRange operands, " + "ArrayRef attributes = {}", [{ LLVMType resultType = func.getType().getFunctionResultType(); if (!resultType.isVoidTy()) - result.addTypes(resultType); - result.addAttribute("callee", builder.getSymbolRefAttr(func)); - result.addAttributes(attributes); - result.addOperands(operands); + $_state.addTypes(resultType); + $_state.addAttribute("callee", $_builder.getSymbolRefAttr(func)); + $_state.addAttributes(attributes); + $_state.addOperands(operands); }]>]; let verifier = [{ if (getNumResults() > 1) @@ -441,8 +432,7 @@ def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>, $res = builder.CreateExtractElement($vector, $position); }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value vector, Value position," - "ArrayRef attrs = {}">]; + "Value vector, Value position, ArrayRef attrs = {}">]; let parser = [{ return parseExtractElementOp(parser, result); }]; let printer = [{ printExtractElementOp(p, *this); }]; } @@ -474,10 +464,9 @@ def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>, extractPosition($position)); }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value container, Value value, " - "ArrayAttr position", + "Value container, Value value, ArrayAttr position", [{ - build(b, result, container.getType(), container, value, position); + build($_builder, $_state, container.getType(), container, value, position); }]>]; let parser = [{ return parseInsertValueOp(parser, result); }]; let printer = [{ printInsertValueOp(p, *this); }]; @@ -491,8 +480,7 @@ def LLVM_ShuffleVectorOp $res = builder.CreateShuffleVector($v1, $v2, mask); }]; let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value v1, Value v2, " - "ArrayAttr mask, ArrayRef attrs = {}">]; + "Value v1, Value v2, ArrayAttr mask, ArrayRef attrs = {}">]; let verifier = [{ auto wrappedVectorType1 = v1().getType().cast(); auto wrappedVectorType2 = v2().getType().cast(); @@ -517,9 +505,8 @@ def LLVM_SelectOp LLVM_Builder< "$res = builder.CreateSelect($condition, $trueValue, $falseValue);"> { let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value condition, Value lhs, " - "Value rhs", [{ - build(b, result, lhs.getType(), condition, lhs, rhs); + "Value condition, Value lhs, Value rhs", [{ + build($_builder, $_state, lhs.getType(), condition, lhs, rhs); }]>]; let assemblyFormat = "operands attr-dict `:` type($condition) `,` type($res)"; } @@ -555,22 +542,21 @@ def LLVM_CondBrOp : LLVM_TerminatorOp<"cond_br", }]; let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value condition," - "Block *trueDest, ValueRange trueOperands," + "Value condition, Block *trueDest, ValueRange trueOperands," "Block *falseDest, ValueRange falseOperands," "Optional> weights = {}", [{ ElementsAttr weightsAttr; if (weights) { weightsAttr = - builder.getI32VectorAttr({static_cast(weights->first), + $_builder.getI32VectorAttr({static_cast(weights->first), static_cast(weights->second)}); } - build(builder, result, condition, trueOperands, falseOperands, weightsAttr, + build($_builder, $_state, condition, trueOperands, falseOperands, weightsAttr, trueDest, falseDest); }]>, OpBuilder< - "OpBuilder &builder, OperationState &result, Value condition," - "Block *trueDest, Block *falseDest, ValueRange falseOperands = {}", [{ - build(builder, result, condition, trueDest, ValueRange(), falseDest, + "Value condition, Block *trueDest, Block *falseDest, " + "ValueRange falseOperands = {}", [{ + build($_builder, $_state, condition, trueDest, ValueRange(), falseDest, falseOperands); }]>, LLVM_TerminatorPassthroughOpBuilder]; } @@ -660,21 +646,21 @@ def LLVM_AddressOfOp let summary = "Creates a pointer pointing to a global or a function"; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, LLVMType resType, " + OpBuilder<"LLVMType resType, " "StringRef name, ArrayRef attrs = {}", [{ - result.addAttribute("global_name", builder.getSymbolRefAttr(name)); - result.addAttributes(attrs); - result.addTypes(resType);}]>, + $_state.addAttribute("global_name",$_builder.getSymbolRefAttr(name)); + $_state.addAttributes(attrs); + $_state.addTypes(resType);}]>, - OpBuilder<"OpBuilder &builder, OperationState &result, GlobalOp global, " + OpBuilder<"GlobalOp global, " "ArrayRef attrs = {}", [{ - build(builder, result, + build($_builder, $_state, global.getType().getPointerTo(global.addr_space()), global.sym_name(), attrs);}]>, - OpBuilder<"OpBuilder &builder, OperationState &result, LLVMFuncOp func, " + OpBuilder<"LLVMFuncOp func, " "ArrayRef attrs = {}", [{ - build(builder, result, + build($_builder, $_state, func.getType().getPointerTo(), func.getName(), attrs);}]> ]; @@ -721,7 +707,7 @@ def LLVM_GlobalOp let regions = (region AnyRegion:$initializer); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, LLVMType type, " + OpBuilder<"LLVMType type, " "bool isConstant, Linkage linkage, StringRef name, " "Attribute value, unsigned addrSpace = 0, " "ArrayRef attrs = {}"> @@ -769,7 +755,7 @@ def LLVM_LLVMFuncOp let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, StringRef name, " + OpBuilder<"StringRef name, " "LLVMType type, Linkage linkage = Linkage::External, " "ArrayRef attrs = {}, " "ArrayRef argAttrs = {}"> diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 179b4d773a3a4..1011e0d7ef683 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -135,8 +135,7 @@ def ForOp : SCF_Op<"for", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value lowerBound, Value upperBound, Value step, " + OpBuilder<"Value lowerBound, Value upperBound, Value step, " "ValueRange iterArgs = llvm::None, " "function_ref" " = nullptr"> @@ -236,10 +235,8 @@ def IfOp : SCF_Op<"if", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value cond, bool withElseRegion">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "TypeRange resultTypes, Value cond, bool withElseRegion">, + OpBuilder<"Value cond, bool withElseRegion">, + OpBuilder<"TypeRange resultTypes, Value cond, bool withElseRegion">, OpBuilder< "OpBuilder &builder, OperationState &result, TypeRange resultTypes, " "Value cond, " @@ -327,14 +324,12 @@ def ParallelOp : SCF_Op<"parallel", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "ValueRange lowerBounds, ValueRange upperBounds, " + OpBuilder<"ValueRange lowerBounds, ValueRange upperBounds, " "ValueRange steps, ValueRange initVals, " "function_ref" " bodyBuilderFn = nullptr">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "ValueRange lowerBounds, ValueRange upperBounds, " + OpBuilder<"ValueRange lowerBounds, ValueRange upperBounds, " "ValueRange steps, " "function_ref" " bodyBuilderFn = nullptr">, @@ -390,8 +385,7 @@ def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> { let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value operand, " + OpBuilder<"Value operand, " "function_ref" " bodyBuilderFn = nullptr"> ]; diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index b944b34b1d9df..235a22a2ce3d4 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -124,7 +124,7 @@ def Shape_ConstSizeOp : Shape_Op<"const_size", [ let results = (outs Shape_SizeType:$result); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, int64_t value"> + OpBuilder<"int64_t value"> ]; let assemblyFormat = "$value attr-dict"; @@ -231,7 +231,7 @@ def Shape_GetExtentOp : Shape_Op<"get_extent", [NoSideEffect]> { let builders = [ // Builder that allows passing a constant dimension as a simple integer. - OpBuilder<"OpBuilder &builder, OperationState &result, Value shape, " + OpBuilder<"Value shape, " "int64_t dim"> ]; @@ -332,7 +332,7 @@ def Shape_NumElementsOp : Shape_Op<"num_elements", [NoSideEffect]> { let results = (outs Shape_SizeOrIndexType:$result); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, Value shape">, + OpBuilder<"Value shape">, ]; let assemblyFormat = "$shape `:` type($shape) `->` type($result) attr-dict"; @@ -383,8 +383,7 @@ def Shape_ReduceOp : Shape_Op<"reduce", let regions = (region SizedRegion<1>:$region); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value shape, ValueRange initVals">, + OpBuilder<"Value shape, ValueRange initVals">, ]; let verifier = [{ return ::verify(*this); }]; @@ -406,7 +405,7 @@ def Shape_ShapeOfOp : Shape_Op<"shape_of", [NoSideEffect]> { let assemblyFormat = "$arg `:` type($arg) `->` type($result) attr-dict"; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, Value arg"> + OpBuilder<"Value arg"> ]; let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }]; diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 649e941050a37..43d47941d0ab4 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -50,8 +50,8 @@ class CastOp traits = []> : let results = (outs AnyType); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, Type destType", [{ - impl::buildCastOp(builder, result, source, destType); + "Value source, Type destType", [{ + impl::buildCastOp($_builder, $_state, source, destType); }]>]; let parser = [{ @@ -162,16 +162,16 @@ class AllocLikeOp]>); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, MemRefType memrefType", [{ - result.types.push_back(memrefType); + "MemRefType memrefType", [{ + $_state.types.push_back(memrefType); }]>, OpBuilder< - "OpBuilder &builder, OperationState &result, MemRefType memrefType, " # - "ValueRange operands, IntegerAttr alignment = IntegerAttr()", [{ - result.addOperands(operands); - result.types.push_back(memrefType); + "MemRefType memrefType, ValueRange operands, " + "IntegerAttr alignment = IntegerAttr()", [{ + $_state.addOperands(operands); + $_state.types.push_back(memrefType); if (alignment) - result.addAttribute(getAlignmentAttrName(), alignment); + $_state.addAttribute(getAlignmentAttrName(), alignment); }]>]; let extraClassDeclaration = [{ @@ -612,8 +612,7 @@ def GenericAtomicRMWOp : Std_Op<"generic_atomic_rmw", [ let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value memref, ValueRange ivs"> + OpBuilder<"Value memref, ValueRange ivs"> ]; let extraClassDeclaration = [{ @@ -668,10 +667,9 @@ def BranchOp : Std_Op<"br", let arguments = (ins Variadic:$destOperands); let successors = (successor AnySuccessor:$dest); - let builders = [OpBuilder<"OpBuilder &, OperationState &result, Block *dest, " - "ValueRange destOperands = {}", [{ - result.addSuccessors(dest); - result.addOperands(destOperands); + let builders = [OpBuilder<"Block *dest, ValueRange destOperands = {}", [{ + $_state.addSuccessors(dest); + $_state.addOperands(destOperands); }]>]; // BranchOp is fully verified by traits. @@ -714,21 +712,18 @@ def CallOp : Std_Op<"call", [CallOpInterface, MemRefsNormalizable]> { let results = (outs Variadic); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, FuncOp callee," - "ValueRange operands = {}", [{ - result.addOperands(operands); - result.addAttribute("callee", builder.getSymbolRefAttr(callee)); - result.addTypes(callee.getType().getResults()); + "FuncOp callee, ValueRange operands = {}", [{ + $_state.addOperands(operands); + $_state.addAttribute("callee",$_builder.getSymbolRefAttr(callee)); + $_state.addTypes(callee.getType().getResults()); }]>, OpBuilder< - "OpBuilder &builder, OperationState &result, SymbolRefAttr callee," - "TypeRange results, ValueRange operands = {}", [{ - result.addOperands(operands); - result.addAttribute("callee", callee); - result.addTypes(results); + "SymbolRefAttr callee, TypeRange results, ValueRange operands = {}", [{ + $_state.addOperands(operands); + $_state.addAttribute("callee", callee); + $_state.addTypes(results); }]>, OpBuilder< - "OpBuilder &builder, OperationState &result, StringRef callee," - "TypeRange results, ValueRange operands = {}", [{ - build(builder, result, builder.getSymbolRefAttr(callee), results, + "StringRef callee, TypeRange results, ValueRange operands = {}", [{ + build($_builder, $_state, $_builder.getSymbolRefAttr(callee), results, operands); }]>]; @@ -790,11 +785,10 @@ def CallIndirectOp : Std_Op<"call_indirect", [ let results = (outs Variadic:$results); let builders = [OpBuilder< - "OpBuilder &, OperationState &result, Value callee," - "ValueRange operands = {}", [{ - result.operands.push_back(callee); - result.addOperands(operands); - result.addTypes(callee.getType().cast().getResults()); + "Value callee, ValueRange operands = {}", [{ + $_state.operands.push_back(callee); + $_state.addOperands(operands); + $_state.addTypes(callee.getType().cast().getResults()); }]>]; let extraClassDeclaration = [{ @@ -956,9 +950,8 @@ def CmpFOp : Std_Op<"cmpf", let results = (outs BoolLike:$result); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, CmpFPredicate predicate," - "Value lhs, Value rhs", [{ - ::buildCmpFOp(builder, result, predicate, lhs, rhs); + "CmpFPredicate predicate, Value lhs, Value rhs", [{ + ::buildCmpFOp($_builder, $_state, predicate, lhs, rhs); }]>]; let extraClassDeclaration = [{ @@ -1078,9 +1071,8 @@ def CmpIOp : Std_Op<"cmpi", let results = (outs BoolLike:$result); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, CmpIPredicate predicate," - "Value lhs, Value rhs", [{ - ::buildCmpIOp(builder, result, predicate, lhs, rhs); + "CmpIPredicate predicate, Value lhs, Value rhs", [{ + ::buildCmpIOp($_builder, $_state, predicate, lhs, rhs); }]>]; let extraClassDeclaration = [{ @@ -1176,15 +1168,14 @@ def CondBranchOp : Std_Op<"cond_br", let successors = (successor AnySuccessor:$trueDest, AnySuccessor:$falseDest); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value condition," - "Block *trueDest, ValueRange trueOperands," + "Value condition, Block *trueDest, ValueRange trueOperands," "Block *falseDest, ValueRange falseOperands", [{ - build(builder, result, condition, trueOperands, falseOperands, trueDest, + build($_builder, $_state, condition, trueOperands, falseOperands, trueDest, falseDest); }]>, OpBuilder< - "OpBuilder &builder, OperationState &result, Value condition," - "Block *trueDest, Block *falseDest, ValueRange falseOperands = {}", [{ - build(builder, result, condition, trueDest, ValueRange(), falseDest, + "Value condition, Block *trueDest, Block *falseDest, " + "ValueRange falseOperands = {}", [{ + build($_builder, $_state, condition, trueDest, ValueRange(), falseDest, falseOperands); }]>]; @@ -1309,9 +1300,8 @@ def ConstantOp : Std_Op<"constant", let arguments = (ins AnyAttr:$value); let results = (outs AnyType); - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Attribute value", - [{ build(builder, result, value.getType(), value); }]>]; + let builders = [OpBuilder<"Attribute value", + [{ build($_builder, $_state, value.getType(), value); }]>]; let extraClassDeclaration = [{ Attribute getValue() { return getAttr("value"); } @@ -1493,10 +1483,8 @@ def DimOp : Std_Op<"dim", [NoSideEffect]> { }]; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value memrefOrTensor, int64_t index">, - OpBuilder<"OpBuilder &builder, OperationState &result, " - "Value memrefOrTensor, Value index"> + OpBuilder<"Value memrefOrTensor, int64_t index">, + OpBuilder<"Value memrefOrTensor, Value index"> ]; let extraClassDeclaration = [{ @@ -1547,8 +1535,7 @@ def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", let builders = [ // Build op and populate its body per callback function. - OpBuilder<"OpBuilder &b, OperationState &result, Type resultTy, " - "ValueRange dynamicExtents, " + OpBuilder<"Type resultTy, ValueRange dynamicExtents, " "function_ref">, ]; @@ -1627,11 +1614,10 @@ def ExtractElementOp : Std_Op<"extract_element", let results = (outs AnyType:$result); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value aggregate," - "ValueRange indices = {}", [{ + "Value aggregate, ValueRange indices = {}", [{ auto resType = aggregate.getType().cast() .getElementType(); - build(builder, result, resType, aggregate, indices); + build($_builder, $_state, resType, aggregate, indices); }]>]; let extraClassDeclaration = [{ @@ -1681,10 +1667,9 @@ def TensorFromElementsOp : Std_Op<"tensor_from_elements", [ let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"OpBuilder &b, OperationState &result, Type elementType," - "ValueRange elements">, + OpBuilder<"Type elementType, ValueRange elements">, // Special case builder for when `elements` has size >=1. - OpBuilder<"OpBuilder &b, OperationState &result, ValueRange elements"> + OpBuilder<"ValueRange elements"> ]; let hasCanonicalizer = 1; @@ -1877,12 +1862,11 @@ def LoadOp : Std_Op<"load", let results = (outs AnyType:$result); let builders = [OpBuilder< - "OpBuilder &, OperationState &result, Value memref," - "ValueRange indices = {}", [{ + "Value memref, ValueRange indices = {}", [{ auto memrefType = memref.getType().cast(); - result.addOperands(memref); - result.addOperands(indices); - result.types.push_back(memrefType.getElementType()); + $_state.addOperands(memref); + $_state.addOperands(indices); + $_state.types.push_back(memrefType.getElementType()); }]>]; let extraClassDeclaration = [{ @@ -2169,9 +2153,9 @@ def RankOp : Std_Op<"rank", [NoSideEffect]> { let verifier = ?; let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value tensor", [{ - auto indexType = builder.getIndexType(); - build(builder, result, indexType, tensor); + "Value tensor", [{ + auto indexType = $_builder.getIndexType(); + build($_builder, $_state, indexType, tensor); }]>]; let hasFolder = 1; @@ -2241,9 +2225,7 @@ def ReturnOp : Std_Op<"return", [NoSideEffect, HasParent<"FuncOp">, let arguments = (ins Variadic:$operands); - let builders = [OpBuilder< - "OpBuilder &b, OperationState &result", [{ build(b, result, llvm::None); }] - >]; + let builders = [OpBuilder<"", [{ build($_builder, $_state, llvm::None); }]>]; let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; } @@ -2307,10 +2289,9 @@ def SelectOp : Std_Op<"select", [NoSideEffect, let results = (outs AnyType:$result); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value condition," - "Value trueValue, Value falseValue", [{ - result.addOperands({condition, trueValue, falseValue}); - result.addTypes(trueValue.getType()); + "Value condition, Value trueValue, Value falseValue", [{ + $_state.addOperands({condition, trueValue, falseValue}); + $_state.addTypes(trueValue.getType()); }]>]; let extraClassDeclaration = [{ @@ -2465,10 +2446,9 @@ def SignExtendIOp : Std_Op<"sexti", let arguments = (ins SignlessIntegerLike:$value); let results = (outs SignlessIntegerLike); - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value value, Type destType", [{ - result.addOperands(value); - result.addTypes(destType); + let builders = [OpBuilder<"Value value, Type destType", [{ + $_state.addOperands(value); + $_state.addTypes(destType); }]>]; let parser = [{ @@ -2540,9 +2520,8 @@ def SplatOp : Std_Op<"splat", [NoSideEffect, let results = (outs AnyTypeOf<[AnyVector, AnyStaticShapeTensor]>:$aggregate); let builders = - [OpBuilder<"OpBuilder &builder, OperationState &result, Value element, " - "Type aggregateType", - [{ build(builder, result, aggregateType, element); }]>]; + [OpBuilder<"Value element, Type aggregateType", + [{ build($_builder, $_state, aggregateType, element); }]>]; let hasFolder = 1; @@ -2619,9 +2598,9 @@ def StoreOp : Std_Op<"store", Variadic:$indices); let builders = [OpBuilder< - "OpBuilder &, OperationState &result, Value valueToStore, Value memref", [{ - result.addOperands(valueToStore); - result.addOperands(memref); + "Value valueToStore, Value memref", [{ + $_state.addOperands(valueToStore); + $_state.addOperands(memref); }]>]; let extraClassDeclaration = [{ @@ -2816,14 +2795,13 @@ def SubViewOp : Std_Op<"subview", [ let builders = [ // Build a SubViewOp with mixed static and dynamic entries. OpBuilder< - "OpBuilder &b, OperationState &result, Value source, " - "ArrayRef staticOffsets, ArrayRef staticSizes," - "ArrayRef staticStrides, ValueRange offsets, ValueRange sizes, " - "ValueRange strides, ArrayRef attrs = {}">, + "Value source, ArrayRef staticOffsets, " + "ArrayRef staticSizes, ArrayRef staticStrides, " + "ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}">, // Build a SubViewOp with all dynamic entries. OpBuilder< - "OpBuilder &b, OperationState &result, Value source, " - "ValueRange offsets, ValueRange sizes, ValueRange strides, " + "Value source, ValueRange offsets, ValueRange sizes, ValueRange strides, " "ArrayRef attrs = {}"> ]; @@ -3073,14 +3051,14 @@ def TensorLoadOp : Std_Op<"tensor_load", let verifier = ?; let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value memref", [{ - result.addOperands(memref); - result.addTypes(getTensorTypeFromMemRefType(memref.getType())); + "Value memref", [{ + $_state.addOperands(memref); + $_state.addTypes(getTensorTypeFromMemRefType(memref.getType())); }]>]; let extraClassDeclaration = [{ /// The result of a tensor_load is always a tensor. - TensorType getType() { + TensorType getType() { Type resultType = getResult().getType(); if (resultType.isa()) return resultType.cast(); @@ -3150,9 +3128,9 @@ def TruncateIOp : Std_Op<"trunci", [NoSideEffect, SameOperandsAndResultShape]> { let results = (outs SignlessIntegerLike); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value value, Type destType", [{ - result.addOperands(value); - result.addTypes(destType); + "Value value, Type destType", [{ + $_state.addOperands(value); + $_state.addTypes(destType); }]>]; let parser = [{ @@ -3418,9 +3396,9 @@ def ZeroExtendIOp : Std_Op<"zexti", [NoSideEffect, SameOperandsAndResultShape]> let results = (outs SignlessIntegerLike); let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value value, Type destType", [{ - result.addOperands(value); - result.addTypes(destType); + "Value value, Type destType", [{ + $_state.addOperands(value); + $_state.addTypes(destType); }]>]; let parser = [{ diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 2aaec4475ccb5..ecac0a3d4b1f3 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -148,13 +148,13 @@ def Vector_ContractionOp : : vector<10xf16>, vector<10xf16> into f32 ``` }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " - "Value acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes">, - OpBuilder< - "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " - "Value acc, ArrayRef> indexingExprs, " - "ArrayRef iteratorTypes">]; + let builders = [ + OpBuilder<"Value lhs, Value rhs, Value acc, ArrayAttr indexingMaps, " + "ArrayAttr iteratorTypes">, + OpBuilder<"Value lhs, Value rhs, Value acc, " + "ArrayRef> indexingExprs, " + "ArrayRef iteratorTypes"> + ]; let extraClassDeclaration = [{ VectorType getLhsType() { return lhs().getType().cast(); @@ -311,8 +311,9 @@ def Vector_ShuffleOp : : vector<2xf32>, vector<2xf32> ; yields vector<4xf32> ``` }]; - let builders = [OpBuilder<"OpBuilder &builder, OperationState &result," - "Value v1, Value v2, ArrayRef">]; + let builders = [ + OpBuilder<"Value v1, Value v2, ArrayRef"> + ]; let extraClassDeclaration = [{ static StringRef getMaskAttrName() { return "mask"; } VectorType getV1VectorType() { @@ -353,12 +354,10 @@ def Vector_ExtractElementOp : $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector) }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " - "int64_t position">, - OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " - "Value position">]; + let builders = [ + OpBuilder<"Value source, int64_t position">, + OpBuilder<"Value source, Value position"> + ]; let extraClassDeclaration = [{ VectorType getVectorType() { return vector().getType().cast(); @@ -384,13 +383,12 @@ def Vector_ExtractOp : %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32> ``` }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source," - "ArrayRef position">, + let builders = [ + OpBuilder<"Value source, ArrayRef position">, // Convenience builder which assumes the values in `position` are defined by // ConstantIndexOp. - OpBuilder<"OpBuilder &builder, OperationState &result, Value source," - "ValueRange position">]; + OpBuilder<"Value source, ValueRange position"> + ]; let extraClassDeclaration = [{ static StringRef getPositionAttrName() { return "position"; } VectorType getVectorType() { @@ -435,10 +433,10 @@ def Vector_ExtractSlicesOp : vector<2x2xf32>, vector<2x1xf32>> ``` }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, TupleType tupleType, " # - "Value vector, ArrayRef sizes, " # - "ArrayRef strides">]; + let builders = [ + OpBuilder<"TupleType tupleType, Value vector, ArrayRef sizes, " + "ArrayRef strides"> + ]; let extraClassDeclaration = [{ VectorType getSourceVectorType() { return vector().getType().cast(); @@ -481,9 +479,10 @@ def Vector_FMAOp : // Fully specified by traits. let verifier = ?; let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)"; - let builders = [OpBuilder< - "OpBuilder &b, OperationState &result, Value lhs, Value rhs, Value acc", - "build(b, result, lhs.getType(), lhs, rhs, acc);">]; + let builders = [ + OpBuilder<"Value lhs, Value rhs, Value acc", + "build($_builder, $_state, lhs.getType(), lhs, rhs, acc);"> + ]; let extraClassDeclaration = [{ VectorType getVectorType() { return lhs().getType().cast(); } }]; @@ -520,12 +519,10 @@ def Vector_InsertElementOp : type($result) }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " - "Value dest, int64_t position">, - OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " - "Value dest, Value position">]; + let builders = [ + OpBuilder<"Value source, Value dest, int64_t position">, + OpBuilder<"Value source, Value dest, Value position"> + ]; let extraClassDeclaration = [{ Type getSourceType() { return source().getType(); } VectorType getDestVectorType() { @@ -559,13 +556,11 @@ def Vector_InsertOp : $source `,` $dest $position attr-dict `:` type($source) `into` type($dest) }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " - "Value dest, ArrayRef position">, + let builders = [ + OpBuilder<"Value source, Value dest, ArrayRef position">, // Convenience builder which assumes all values are constant indices. - OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " - "Value dest, ValueRange position">]; + OpBuilder<"Value source, Value dest, ValueRange position"> + ]; let extraClassDeclaration = [{ static StringRef getPositionAttrName() { return "position"; } Type getSourceType() { return source().getType(); } @@ -666,9 +661,10 @@ def Vector_InsertStridedSliceOp : $source `,` $dest attr-dict `:` type($source) `into` type($dest) }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, Value dest, " # - "ArrayRef offsets, ArrayRef strides">]; + let builders = [ + OpBuilder<"Value source, Value dest, ArrayRef offsets, " + "ArrayRef strides"> + ]; let extraClassDeclaration = [{ static StringRef getOffsetsAttrName() { return "offsets"; } static StringRef getStridesAttrName() { return "strides"; } @@ -730,9 +726,8 @@ def Vector_OuterProductOp : }]; let builders = [ // Build an op without mask, use the type of `acc` as the return type. - OpBuilder< - "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " - "Value acc">]; + OpBuilder<"Value lhs, Value rhs, Value acc"> + ]; let extraClassDeclaration = [{ VectorType getOperandVectorTypeLHS() { return lhs().getType().cast(); @@ -904,10 +899,10 @@ def Vector_ExtractStridedSliceOp : vector<4x8x16xf32> to vector<2x4x16xf32> ``` }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source, " # - "ArrayRef offsets, ArrayRef sizes, " # - "ArrayRef strides">]; + let builders = [ + OpBuilder<"Value source, ArrayRef offsets, " + "ArrayRef sizes, ArrayRef strides"> + ]; let extraClassDeclaration = [{ static StringRef getOffsetsAttrName() { return "offsets"; } static StringRef getSizesAttrName() { return "sizes"; } @@ -1069,13 +1064,11 @@ def Vector_TransferReadOp : let builders = [ // Builder that sets padding to zero. - OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " - "Value memref, ValueRange indices, AffineMap permutationMap, " - "ArrayRef maybeMasked = {}">, + OpBuilder<"VectorType vector, Value memref, ValueRange indices, " + "AffineMap permutationMap, ArrayRef maybeMasked = {}">, // Builder that sets permutation map (resp. padding) to // 'getMinorIdentityMap' (resp. zero). - OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " - "Value memref, ValueRange indices, " + OpBuilder<"VectorType vector, Value memref, ValueRange indices, " "ArrayRef maybeMasked = {}"> ]; @@ -1154,11 +1147,10 @@ def Vector_TransferWriteOp : let builders = [ // Builder that sets permutation map to 'getMinorIdentityMap'. - OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " - "Value memref, ValueRange indices, " + OpBuilder<"Value vector, Value memref, ValueRange indices, " "ArrayRef maybeMasked = {}">, - OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " - "Value memref, ValueRange indices, AffineMap permutationMap">, + OpBuilder<"Value vector, Value memref, ValueRange indices, " + "AffineMap permutationMap">, ]; let hasFolder = 1; @@ -1602,8 +1594,9 @@ def Vector_TypeCastOp : /// Build the canonical memRefType with a single vector. /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref>. - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value source">]; + let builders = [ + OpBuilder<"Value source"> + ]; let extraClassDeclaration = [{ MemRefType getMemRefType() { @@ -1756,9 +1749,9 @@ def Vector_TransposeOp : [c, f] ] ``` }]; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, Value vector, " - "ArrayRef transp">]; + let builders = [ + OpBuilder<"Value vector, ArrayRef transp"> + ]; let extraClassDeclaration = [{ VectorType getVectorType() { return vector().getType().cast(); @@ -1902,14 +1895,14 @@ def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect, ``` }]; let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " - "unsigned lhsRows, unsigned lhsColumns, unsigned rhsColumns", + OpBuilder<"Value lhs, Value rhs, unsigned lhsRows, unsigned lhsColumns, " + "unsigned rhsColumns", [{ - result.addOperands({lhs, rhs}); - result.addAttribute("lhs_rows", builder.getI32IntegerAttr(lhsRows)); - result.addAttribute("lhs_columns", builder.getI32IntegerAttr(lhsColumns)); - result.addAttribute("rhs_columns", builder.getI32IntegerAttr(rhsColumns)); - result.addTypes(VectorType::get(lhsRows * rhsColumns, + $_state.addOperands({lhs, rhs}); + $_state.addAttribute("lhs_rows",$_builder.getI32IntegerAttr(lhsRows)); + $_state.addAttribute("lhs_columns",$_builder.getI32IntegerAttr(lhsColumns)); + $_state.addAttribute("rhs_columns",$_builder.getI32IntegerAttr(rhsColumns)); + $_state.addTypes(VectorType::get(lhsRows * rhsColumns, lhs.getType().cast().getElementType())); }]>, ]; diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 3743e39d05664..6f3c8f5aee680 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -636,8 +636,7 @@ def OpFuncRef : TEST_Op<"op_funcref"> { let description = [{ The "test.op_funcref" is a test op with a reference to a function symbol. }]; - let builders = [OpBuilder<[{OpBuilder &builder, OperationState &state, - FuncOp function}]>]; + let builders = [OpBuilder<[{FuncOp function}]>]; } // Pattern add the argument plus a increasing static number hidden in @@ -1132,12 +1131,12 @@ def LegalOpB : TEST_Op<"legal_op_b">, Results<(outs I32)>; def IllegalOpTerminator : TEST_Op<"illegal_op_terminator", [Terminator]>; def IllegalOpWithRegion : TEST_Op<"illegal_op_with_region"> { let skipDefaultBuilders = 1; - let builders = [OpBuilder<"OpBuilder &builder, OperationState &state", - [{ Region *bodyRegion = state.addRegion(); - OpBuilder::InsertionGuard g(builder); - Block *body = builder.createBlock(bodyRegion); - builder.setInsertionPointToEnd(body); - builder.create(state.location); + let builders = [OpBuilder<"", + [{ Region *bodyRegion = $_state.addRegion(); + OpBuilder::InsertionGuard g($_builder); + Block *body = $_builder.createBlock(bodyRegion); + $_builder.setInsertionPointToEnd(body); + $_builder.create($_state.location); }]>]; } def IllegalOpWithRegionAnchor : TEST_Op<"illegal_op_with_region_anchor">; @@ -1176,8 +1175,7 @@ def TestRegionBuilderOp : TEST_Op<"region_builder">; def TestReturnOp : TEST_Op<"return", [ReturnLike, Terminator]> { let arguments = (ins Variadic); let builders = [ - OpBuilder<"OpBuilder &builder, OperationState &state", - [{ build(builder, state, {}); }]> + OpBuilder<"", [{ build($_builder, $_state, {}); }]> ]; } def TestCastOp : TEST_Op<"cast">, From 984744a1314ce165378e7945bc45995302a8cb80 Mon Sep 17 00:00:00 2001 From: John McCall Date: Tue, 29 Sep 2020 18:47:37 -0400 Subject: [PATCH 102/544] Fix a variety of minor issues with ObjC method mangling: - Fix a memory leak accidentally introduced yesterday by using CodeGen's existing mangling context instead of creating a new context afresh. - Move GNU-runtime ObjC method mangling into the AST mangler; this will eventually be necessary to support direct methods there, but is also just the right architecture. - Make the Apple-runtime method mangling work properly when given an interface declaration, fixing a bug (which had solidified into a test) where mangling a category method from the interface could cause it to be mangled as if the category name was a class name. (Category names are namespaced within their class and have no global meaning.) - Fix a code cross-reference in dsymutil. Based on a patch by Ellis Hoag. --- clang/include/clang/AST/DeclObjC.h | 7 ++++++ clang/lib/AST/DeclObjC.cpp | 8 +++++++ clang/lib/AST/Mangle.cpp | 34 ++++++++++++++++++++++++++++- clang/lib/CodeGen/CGObjCGNU.cpp | 24 ++------------------ clang/lib/CodeGen/CGObjCMac.cpp | 20 +++++------------ clang/lib/CodeGen/CGObjCRuntime.cpp | 10 +++++++++ clang/lib/CodeGen/CGObjCRuntime.h | 3 +++ clang/test/AST/ast-dump-decl-json.m | 2 +- llvm/tools/dsymutil/SymbolMap.cpp | 2 +- 9 files changed, 71 insertions(+), 39 deletions(-) diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h index 5613ed8370c03..32e69d7fe1ed1 100644 --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -320,6 +320,13 @@ class ObjCMethodDecl : public NamedDecl, public DeclContext { return const_cast(this)->getClassInterface(); } + /// If this method is declared or implemented in a category, return + /// that category. + ObjCCategoryDecl *getCategory(); + const ObjCCategoryDecl *getCategory() const { + return const_cast(this)->getCategory(); + } + Selector getSelector() const { return getDeclName().getObjCSelector(); } QualType getReturnType() const { return MethodDeclType; } diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp index 5c8b34731f363..78ef9a1c67c9e 100644 --- a/clang/lib/AST/DeclObjC.cpp +++ b/clang/lib/AST/DeclObjC.cpp @@ -1165,6 +1165,14 @@ ObjCInterfaceDecl *ObjCMethodDecl::getClassInterface() { llvm_unreachable("unknown method context"); } +ObjCCategoryDecl *ObjCMethodDecl::getCategory() { + if (auto *CD = dyn_cast(getDeclContext())) + return CD; + if (auto *IMD = dyn_cast(getDeclContext())) + return IMD->getCategoryDecl(); + return nullptr; +} + SourceRange ObjCMethodDecl::getReturnTypeSourceRange() const { const auto *TSI = getReturnTypeSourceInfo(); if (TSI) diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index a67f57688e304..3282fcbd584f3 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -308,12 +308,44 @@ void MangleContext::mangleObjCMethodName(const ObjCMethodDecl *MD, raw_ostream &OS, bool includePrefixByte, bool includeCategoryNamespace) { + if (getASTContext().getLangOpts().ObjCRuntime.isGNUFamily()) { + // This is the mangling we've always used on the GNU runtimes, but it + // has obvious collisions in the face of underscores within class + // names, category names, and selectors; maybe we should improve it. + + OS << (MD->isClassMethod() ? "_c_" : "_i_") + << MD->getClassInterface()->getName() << '_'; + + if (includeCategoryNamespace) { + if (auto category = MD->getCategory()) + OS << category->getName(); + } + OS << '_'; + + auto selector = MD->getSelector(); + for (unsigned slotIndex = 0, + numArgs = selector.getNumArgs(), + slotEnd = std::max(numArgs, 1U); + slotIndex != slotEnd; ++slotIndex) { + if (auto name = selector.getIdentifierInfoForSlot(slotIndex)) + OS << name->getName(); + + // Replace all the positions that would've been ':' with '_'. + // That's after each slot except that a unary selector doesn't + // end in ':'. + if (numArgs) + OS << '_'; + } + + return; + } + // \01+[ContainerName(CategoryName) SelectorName] if (includePrefixByte) { OS << '\01'; } OS << (MD->isInstanceMethod() ? '-' : '+') << '['; - if (const auto *CID = dyn_cast(MD->getDeclContext())) { + if (const auto *CID = MD->getCategory()) { OS << CID->getClassInterface()->getName(); if (includeCategoryNamespace) { OS << '(' << *CID << ')'; diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index bb9c494ae68ee..ed36e4a5cbc1a 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -42,16 +42,6 @@ using namespace CodeGen; namespace { -std::string SymbolNameForMethod( StringRef ClassName, - StringRef CategoryName, const Selector MethodName, - bool isClassMethod) { - std::string MethodNameColonStripped = MethodName.getAsString(); - std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(), - ':', '_'); - return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" + - CategoryName + "_" + MethodNameColonStripped).str(); -} - /// Class that lazily initialises the runtime function. Avoids inserting the /// types and the function declaration into a module if they're not used, and /// avoids constructing the type more than once if it's used more than once. @@ -2823,9 +2813,7 @@ GenerateMethodList(StringRef ClassName, ASTContext &Context = CGM.getContext(); for (const auto *OMD : Methods) { llvm::Constant *FnPtr = - TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName, - OMD->getSelector(), - isClassMethodList)); + TheModule.getFunction(getSymbolNameForMethod(OMD)); assert(FnPtr && "Can't generate metadata for method that doesn't exist"); auto Method = MethodArray.beginStruct(ObjCMethodTy); if (isV2ABI) { @@ -3873,18 +3861,10 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { llvm::Function *CGObjCGNU::GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) { - const ObjCCategoryImplDecl *OCD = - dyn_cast(OMD->getDeclContext()); - StringRef CategoryName = OCD ? OCD->getName() : ""; - StringRef ClassName = CD->getName(); - Selector MethodName = OMD->getSelector(); - bool isClassMethod = !OMD->isInstanceMethod(); - CodeGenTypes &Types = CGM.getTypes(); llvm::FunctionType *MethodTy = Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); - std::string FunctionName = SymbolNameForMethod(ClassName, CategoryName, - MethodName, isClassMethod); + std::string FunctionName = getSymbolNameForMethod(OMD); llvm::Function *Method = llvm::Function::Create(MethodTy, diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index e2f4cabce2784..aa50d2173a7de 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1079,8 +1079,8 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime { void EmitImageInfo(); public: - CGObjCCommonMac(CodeGen::CodeGenModule &cgm) : - CGObjCRuntime(cgm), VMContext(cgm.getLLVMContext()) { } + CGObjCCommonMac(CodeGen::CodeGenModule &cgm) + : CGObjCRuntime(cgm), VMContext(cgm.getLLVMContext()) {} bool isNonFragileABI() const { return ObjCABI == 2; @@ -4001,18 +4001,14 @@ llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD, if (OMD->isDirectMethod()) { Method = GenerateDirectMethod(OMD, CD); } else { - SmallString<256> Name; - llvm::raw_svector_ostream OS(Name); - const auto &MC = CGM.getContext().createMangleContext(); - MC->mangleObjCMethodName(OMD, OS, /*includePrefixByte=*/true, - /*includeCategoryNamespace=*/true); + auto Name = getSymbolNameForMethod(OMD); CodeGenTypes &Types = CGM.getTypes(); llvm::FunctionType *MethodTy = Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); Method = llvm::Function::Create(MethodTy, llvm::GlobalValue::InternalLinkage, - Name.str(), &CGM.getModule()); + Name, &CGM.getModule()); } MethodDefinitions.insert(std::make_pair(OMD, Method)); @@ -4057,14 +4053,10 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, // Replace the cached function in the map. I->second = Fn; } else { - SmallString<256> Name; - llvm::raw_svector_ostream OS(Name); - const auto &MC = CGM.getContext().createMangleContext(); - MC->mangleObjCMethodName(OMD, OS, /*includePrefixByte=*/true, - /*includeCategoryNamespace=*/false); + auto Name = getSymbolNameForMethod(OMD, /*include category*/ false); Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, - Name.str(), &CGM.getModule()); + Name, &CGM.getModule()); DirectMethodDefinitions.insert(std::make_pair(COMD, Fn)); } diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp index 39efe040302d3..9bf4d83f9bc45 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -390,3 +390,13 @@ clang::CodeGen::emitObjCProtocolObject(CodeGenModule &CGM, const ObjCProtocolDecl *protocol) { return CGM.getObjCRuntime().GetOrEmitProtocol(protocol); } + +std::string CGObjCRuntime::getSymbolNameForMethod(const ObjCMethodDecl *OMD, + bool includeCategoryName) { + std::string buffer; + llvm::raw_string_ostream out(buffer); + CGM.getCXXABI().getMangleContext().mangleObjCMethodName(OMD, out, + /*includePrefixByte=*/true, + includeCategoryName); + return buffer; +} diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h index a2c189585f7bc..60f98389067e1 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.h +++ b/clang/lib/CodeGen/CGObjCRuntime.h @@ -115,6 +115,9 @@ class CGObjCRuntime { public: virtual ~CGObjCRuntime(); + std::string getSymbolNameForMethod(const ObjCMethodDecl *method, + bool includeCategoryName = true); + /// Generate the function required to register all Objective-C components in /// this compilation unit with the runtime library. virtual llvm::Function *ModuleInitFunction() = 0; diff --git a/clang/test/AST/ast-dump-decl-json.m b/clang/test/AST/ast-dump-decl-json.m index d100811c1c246..be730039f9bf7 100644 --- a/clang/test/AST/ast-dump-decl-json.m +++ b/clang/test/AST/ast-dump-decl-json.m @@ -836,7 +836,7 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "bar", -// CHECK-NEXT: "mangledName": "-[TestObjCCategoryDecl bar]", +// CHECK-NEXT: "mangledName": "-[TestObjCClass(TestObjCCategoryDecl) bar]", // CHECK-NEXT: "returnType": { // CHECK-NEXT: "qualType": "void" // CHECK-NEXT: }, diff --git a/llvm/tools/dsymutil/SymbolMap.cpp b/llvm/tools/dsymutil/SymbolMap.cpp index abf7557ca61e7..07a54795a8418 100644 --- a/llvm/tools/dsymutil/SymbolMap.cpp +++ b/llvm/tools/dsymutil/SymbolMap.cpp @@ -47,7 +47,7 @@ StringRef SymbolMapTranslator::operator()(StringRef Input) { return Translation; // Objective-C symbols for the MachO symbol table start with a \1. Please see - // `CGObjCCommonMac::GetNameForMethod` in clang. + // `MangleContext::mangleObjCMethodName` in clang. if (Translation[0] == 1) return StringRef(Translation).drop_front(); From 5c4fc581d5fe8427f03ec90b0d745453398aa3ad Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Tue, 29 Sep 2020 16:19:08 -0700 Subject: [PATCH 103/544] [DebugInfo] Add types from constructor homing to the retained types list. Add class types to the retained types list to make sure they don't get dropped if the constructor is optimized out later. Differential Revision: https://reviews.llvm.org/D88522 --- clang/lib/CodeGen/CGDebugInfo.cpp | 2 +- clang/test/CodeGenCXX/debug-info-limited-ctor.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 27c584ff0795a..88aace8b85dd1 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1726,7 +1726,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // info is emitted. if (DebugKind == codegenoptions::DebugInfoConstructor) if (const CXXConstructorDecl *CD = dyn_cast(Method)) - completeClass(CD->getParent()); + completeUnusedClass(*CD->getParent()); llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( diff --git a/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp b/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp index cf2e89e35522f..cf7adad6b4492 100644 --- a/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp +++ b/clang/test/CodeGenCXX/debug-info-limited-ctor.cpp @@ -9,7 +9,7 @@ struct B { B(); } TestB; -// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "C"{{.*}}DIFlagTypePassByValue +// CHECK-DAG: ![[C:[0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C"{{.*}}DIFlagTypePassByValue struct C { C() {} } TestC; @@ -73,3 +73,7 @@ void f(K k) {} void L() { auto func = [&]() {}; } + +// Check that types are being added to retained types list. +// CHECK-DAG: !DICompileUnit{{.*}}retainedTypes: ![[RETAINED:[0-9]+]] +// CHECK-DAG: ![[RETAINED]] = {{.*}}![[C]] From f71849c74ed58e5d9ed3681cc6294128098012dc Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 29 Sep 2020 17:07:06 -0700 Subject: [PATCH 104/544] [docs] Recommend dropLocation() over setDebugLoc(DebugLoc()) --- llvm/docs/HowToUpdateDebugInfo.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 3283bfd893393..7df2a8a258275 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -117,7 +117,7 @@ When to drop an instruction location A transformation should drop debug locations if the rules for :ref:`preserving` and :ref:`merging` debug locations do not apply. The API to -use is ``Instruction::setDebugLoc()``. +use is ``Instruction::dropLocation()``. The purpose of this rule is to prevent erratic or misleading single-stepping behavior in situations in which an instruction has no clear, unambiguous From 616c68aab75016d5d7ebc0b79bb3c38405b18ae6 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 14:38:56 -0700 Subject: [PATCH 105/544] [NFC][MSAN] Remove an attribute in test --- llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll index a8ce0561c3b87..54493c9cdc8c5 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define <4 x i64> @test_mm256_abs_epi8(<4 x i64> noundef %a) local_unnamed_addr #0 { +define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a) local_unnamed_addr #0 { ; CHECK-LABEL: @test_mm256_abs_epi8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 From 795d94fdb9d2377452f86952dcf0921a6c68d2b5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 15:31:25 -0700 Subject: [PATCH 106/544] [NFC][Msan] Add llvm.fabs test llvm.fabs does not need a special handler as llvm.abs as its single argument type match the return type. --- .../MemorySanitizer/abs-vector.ll | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll index 54493c9cdc8c5..d3b29d65f2f22 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s -; RUN: opt < %s -S -msan-check-access-address=0 -msan | FileCheck %s -; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN -; RUN: opt < %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN +; RUN: opt %s -S -msan-check-access-address=0 -passes=msan 2>&1 | FileCheck %s +; RUN: opt %s -S -msan-check-access-address=0 -msan | FileCheck %s +; RUN: opt %s -S -msan-check-access-address=0 -msan-track-origins=2 -passes=msan 2>&1 | FileCheck %s --check-prefixes=CHECK,ORIGIN +; RUN: opt %s -S -msan-check-access-address=0 -msan-track-origins=2 -msan | FileCheck %s --check-prefixes=CHECK,ORIGIN target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -73,9 +73,26 @@ entry: ret <4 x i64> %2 } +define <4 x double> @test_fabs(<4 x double> %a) local_unnamed_addr #0 { +; CHECK-LABEL: @test_fabs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4 +; CHECK: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[A:%.*]]) +; CHECK-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], i32* @__msan_retval_origin_tls, align 4 +; CHECK: ret <4 x double> [[TMP2]] +; +entry: + %0 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %a) + ret <4 x double> %0 +} + declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) #1 declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) #1 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1 +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1 attributes #0 = { nounwind readnone sanitize_memory } attributes #1 = { nounwind readnone speculatable willreturn } From afcf9c47c5e74a0b567531547b677ff1d383ae50 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 29 Sep 2020 17:08:42 -0700 Subject: [PATCH 107/544] Fix test failures with trunk clang - Make the consteval constructor for the zero type be noexcept - Don't expect three-way comparison of 0 against a comparison category to fail --- libcxx/include/compare | 2 +- .../cmp/cmp.categories.pre/zero_type.verify.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/include/compare b/libcxx/include/compare index c1cd81bb6fc1a..596505f8860d4 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -156,7 +156,7 @@ enum class _LIBCPP_ENUM_VIS _NCmpResult : signed char { struct _CmpUnspecifiedParam { _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEVAL - _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) {} + _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) noexcept {} template>> _CmpUnspecifiedParam(_Tp) = delete; diff --git a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp index 40f6677d43c9b..fc21c03a3ddf0 100644 --- a/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp +++ b/libcxx/test/std/language.support/cmp/cmp.categories.pre/zero_type.verify.cpp @@ -46,9 +46,9 @@ void test_category(T v) { void(0 > v); void(v >= 0); void(0 >= v); -#ifndef _LIBCPP_HAS_NO_THREE_WAY_COMPARISON - void(v <=> 0); // expected-error 3 {{}} - void(0 <=> v); // expected-error 3 {{}} +#ifndef _LIBCPP_HAS_NO_SPACESHIP_OPERATOR + void(v <=> 0); + void(0 <=> v); #endif } From bd14d6ea1517c93ceecaec29dad016d9a122fa1b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 17:22:16 -0700 Subject: [PATCH 108/544] [lldb] Hoist -s (trace directory) argument out of LLDB_TEST_COMMON_ARGS (NFC) Give the trace directory argument its own variable (LLDB_TEST_TRACE_DIRECTORY) so that we can configure it in lit.site.cfg.py if we so desire. --- lldb/test/API/CMakeLists.txt | 7 +++++-- lldb/test/API/lit.cfg.py | 3 +++ lldb/test/API/lit.site.cfg.py.in | 1 + lldb/utils/lldb-dotest/CMakeLists.txt | 5 +++++ lldb/utils/lldb-dotest/lldb-dotest.in | 10 ++++++---- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index fe92012e37678..f4802e2f5ca2c 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -36,13 +36,14 @@ set(LLDB_TEST_USER_ARGS # hash of filename and .text section, there *will* be conflicts inside # the build directory. set(LLDB_TEST_COMMON_ARGS - -s - ${CMAKE_BINARY_DIR}/lldb-test-traces -S nm -u CXXFLAGS -u CFLAGS ) +# Configure the traces directory. +set(LLDB_TEST_TRACE_DIRECTORY "${PROJECT_BINARY_DIR}/lldb-test-traces" CACHE PATH "The test traces directory.") + # Set the path to the default lldb test executable. set(LLDB_DEFAULT_TEST_EXECUTABLE "${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb${CMAKE_EXECUTABLE_SUFFIX}") @@ -141,6 +142,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_TRACE_DIRECTORY "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") @@ -170,6 +172,7 @@ endif() string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_DOTEST_ARGS "${LLDB_DOTEST_ARGS}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_SOURCE_DIR "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_BUILD_DIRECTORY "${LLDB_TEST_BUILD_DIRECTORY}") +string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_TRACE_DIRECTORY "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_EXECUTABLE "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}") diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index d78a1aae54675..a4d4d83fd366d 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -177,6 +177,9 @@ def delete_module_cache(path): if is_configured('lldb_build_directory'): dotest_cmd += ['--build-dir', config.lldb_build_directory] +if is_configured('lldb_trace_directory'): + dotest_cmd += ['-s', config.lldb_trace_directory] + if is_configured('lldb_module_cache'): delete_module_cache(config.lldb_module_cache) dotest_cmd += ['--lldb-module-cache-dir', config.lldb_module_cache] diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 271faf371f9d1..0481e8fecc73a 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -19,6 +19,7 @@ config.shared_libs = @LLVM_ENABLE_SHARED_LIBS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.target_triple = "@TARGET_TRIPLE@" config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@" +config.lldb_trace_directory = "@LLDB_TEST_TRACE_DIRECTORY@" config.lldb_reproducer_directory = os.path.join("@LLDB_TEST_BUILD_DIRECTORY@", "reproducers") config.python_executable = "@Python3_EXECUTABLE@" config.dotest_args_str = "@LLDB_DOTEST_ARGS@" diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index 1001fbf04ebe7..cba04f3499b95 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -23,6 +23,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -37,6 +38,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -50,6 +52,7 @@ if(LLDB_BUILT_STANDALONE) string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -71,6 +74,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") @@ -89,6 +93,7 @@ else() set(LLDB_SOURCE_DIR_CONFIGURED "${LLDB_SOURCE_DIR}") set(LLDB_FRAMEWORK_DIR_CONFIGURED "${LLDB_FRAMEWORK_DIR}") set(LLDB_TEST_BUILD_DIRECTORY_CONFIGURED "${LLDB_TEST_BUILD_DIRECTORY}") + set(LLDB_TEST_TRACE_DIRECTORY_CONFIGURED "${LLDB_TEST_TRACE_DIRECTORY}") set(LLDB_TEST_EXECUTABLE_CONFIGURED "${LLDB_TEST_EXECUTABLE}") set(LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}") set(LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}") diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in index cfd73f5b32a6e..d66968955a740 100755 --- a/lldb/utils/lldb-dotest/lldb-dotest.in +++ b/lldb/utils/lldb-dotest/lldb-dotest.in @@ -3,7 +3,6 @@ import subprocess import sys dotest_path = '@LLDB_SOURCE_DIR_CONFIGURED@/test/API/dotest.py' -build_dir = '@LLDB_TEST_BUILD_DIRECTORY_CONFIGURED@' dotest_args_str = '@LLDB_DOTEST_ARGS_CONFIGURED@' arch = '@LLDB_TEST_ARCH@' executable = '@LLDB_TEST_EXECUTABLE_CONFIGURED@' @@ -12,9 +11,11 @@ dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@' filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@' yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@' server = '@LLDB_TEST_SERVER_CONFIGURED@' -lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" -lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@" +lldb_build_dir = '@LLDB_TEST_BUILD_DIRECTORY_CONFIGURED@' lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@" +lldb_framework_dir = "@LLDB_FRAMEWORK_DIR_CONFIGURED@" +lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@" +lldb_trace_dir = '@LLDB_TEST_TRACE_DIRECTORY_CONFIGURED@' if __name__ == '__main__': wrapper_args = sys.argv[1:] @@ -23,7 +24,8 @@ if __name__ == '__main__': cmd = [sys.executable, dotest_path] cmd.extend(['--arch', arch]) cmd.extend(dotest_args) - cmd.extend(['--build-dir', build_dir]) + cmd.extend(['-s', lldb_trace_dir]) + cmd.extend(['--build-dir', lldb_build_dir]) cmd.extend(['--executable', executable]) cmd.extend(['--compiler', compiler]) cmd.extend(['--dsymutil', dsymutil]) From 674f57870f4c8a7fd7b629bffc85b149cbefd3e0 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 29 Sep 2020 17:37:36 -0700 Subject: [PATCH 109/544] [gardening] Replace some uses of setDebugLoc(DebugLoc()) with dropLocation(), NFC --- llvm/lib/Transforms/Scalar/LICM.cpp | 2 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 631fa2f27c5b3..bc581e7ad40f3 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2159,7 +2159,7 @@ bool llvm::promoteLoopAccessesToScalars( if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); - PreheaderLoad->setDebugLoc(DebugLoc()); + PreheaderLoad->dropLocation(); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 124a7c423e72c..1672293380d7b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2218,7 +2218,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // be misleading while debugging. for (auto &I : *ThenBB) { if (!SpeculatedStoreValue || &I != SpeculatedStore) - I.setDebugLoc(DebugLoc()); + I.dropLocation(); I.dropUnknownNonDebugMetadata(); } @@ -2878,7 +2878,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // When we fold the bonus instructions we want to make sure we // reset their debug locations in order to avoid stepping on dead // code caused by folding dead branches. - NewBonusInst->setDebugLoc(DebugLoc()); + NewBonusInst->dropLocation(); RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); @@ -2902,7 +2902,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // Reset the condition debug location to avoid jumping on dead code // as the result of folding dead branches. - CondInPred->setDebugLoc(DebugLoc()); + CondInPred->dropLocation(); RemapInstruction(CondInPred, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); From d04775e16bba456f0be0aaa7478959c5bfa22c41 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 29 Sep 2020 08:51:26 -0400 Subject: [PATCH 110/544] Add remquo, frexp and modf overload functions to HIP header --- clang/lib/Headers/__clang_hip_math.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 0c27ef60a0648..b72bb40ccdb67 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -1221,6 +1221,27 @@ __DEVICE__ inline _Float16 pow(_Float16 __base, int __iexp) { return __ocml_pown_f16(__base, __iexp); } + +__DEVICE__ +inline float remquo(float __x, float __y, int *__quo) { + return remquof(__x, __y, __quo); +} + +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized, + double>::type + remquo(__T1 __x, __T2 __y, int *__quo) { + return remquo((double)__x, (double)__y, __quo); +} + +__DEVICE__ +inline float frexp(float __x, int *__nptr) { return frexpf(__x, __nptr); } + +__DEVICE__ +inline float modf(float __x, float *__iptr) { return modff(__x, __iptr); } + #endif #pragma pop_macro("__DEF_FUN1") From c6b18cf9672bca4f61bb3ef401173742068e46ea Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Tue, 29 Sep 2020 17:11:12 -0500 Subject: [PATCH 111/544] [RISCV] Use the extensions in the canonical order (NFC) Use the ISA extensions for specific processors in the conventional canonical order. --- llvm/lib/Target/RISCV/RISCV.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 578b393dc879a..66eda3ba360cf 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -231,16 +231,16 @@ def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; def : ProcessorModel<"bullet-rv32", BulletModel, []>; def : ProcessorModel<"bullet-rv64", BulletModel, [Feature64Bit]>; -def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtA, - FeatureStdExtC, - FeatureStdExtM]>; +def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtC]>; def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, + FeatureStdExtM, + FeatureStdExtF, FeatureStdExtA, - FeatureStdExtC, FeatureStdExtD, - FeatureStdExtF, - FeatureStdExtM]>; + FeatureStdExtC]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. From 0a146a9d0bdd54411f0b0712e27481a4c280ae03 Mon Sep 17 00:00:00 2001 From: Hubert Tong Date: Tue, 29 Sep 2020 21:11:16 -0400 Subject: [PATCH 112/544] [AIX] asm output: use character literals in byte lists for strings This patch improves the assembly output produced for string literals by using character literals in byte lists. This provides the benefits of having printable characters appear as such in the assembly output and of having strings kept as logical units on the same line. Reviewed By: daltenty Differential Revision: https://reviews.llvm.org/D80953 --- llvm/include/llvm/MC/MCAsmInfo.h | 23 +++++ llvm/lib/MC/MCAsmInfoXCOFF.cpp | 2 + llvm/lib/MC/MCAsmStreamer.cpp | 94 ++++++++++++++----- llvm/test/CodeGen/PowerPC/aix-bytestring.ll | 7 ++ llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll | 5 +- .../PowerPC/aix-xcoff-mergeable-str.ll | 25 +---- llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll | 5 +- 7 files changed, 108 insertions(+), 53 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-bytestring.ll diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 0f9d503045d88..2b889d0ed5fa9 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; /// This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { +public: + /// Assembly character literal syntax types. + enum AsmCharLiteralSyntax { + ACLS_Unknown, /// Unknown; character literals not used by LLVM for this + /// target. + ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single + /// quote, e.g., `'A`. + }; + protected: //===------------------------------------------------------------------===// // Properties to be set by the target writer, used to configure asm printer. @@ -200,6 +209,16 @@ class MCAsmInfo { /// doesn't support this, it can be set to null. Defaults to "\t.asciz\t" const char *AscizDirective; + /// This directive accepts a comma-separated list of bytes for emission as a + /// string of bytes. For targets that do not support this, it shall be set to + /// null. Defaults to null. + const char *ByteListDirective = nullptr; + + /// Form used for character literals in the assembly syntax. Useful for + /// producing strings as byte lists. If a target does not use or support + /// this, it shall be set to ACLS_Unknown. Defaults to ACLS_Unknown. + AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown; + /// These directives are used to output some unit of integer data to the /// current section. If a data directive is set to null, smaller data /// directives will be used to emit the large sizes. Defaults to "\t.byte\t", @@ -562,6 +581,10 @@ class MCAsmInfo { } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } + const char *getByteListDirective() const { return ByteListDirective; } + AsmCharLiteralSyntax characterLiteralSyntax() const { + return CharacterLiteralSyntax; + } bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; } unsigned getTextAlignFillValue() const { return TextAlignFillValue; } const char *getGlobalDirective() const { return GlobalDirective; } diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp index b5c5bb3ace8ed..04982af4af31b 100644 --- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp @@ -24,6 +24,8 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() { ZeroDirectiveSupportsNonZeroValue = false; AsciiDirective = nullptr; // not supported AscizDirective = nullptr; // not supported + ByteListDirective = "\t.byte\t"; + CharacterLiteralSyntax = ACLS_SingleQuotePrefix; // Use .vbyte for data definition to avoid directives that apply an implicit // alignment. diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 647197d8de4d1..8d96935b22059 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -971,6 +971,47 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, static inline char toOctal(int X) { return (X&7)+'0'; } +static void PrintByteList(StringRef Data, raw_ostream &OS, + MCAsmInfo::AsmCharLiteralSyntax ACLS) { + assert(!Data.empty() && "Cannot generate an empty list."); + const auto printCharacterInOctal = [&OS](unsigned char C) { + OS << '0'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + }; + const auto printOneCharacterFor = [printCharacterInOctal]( + auto printOnePrintingCharacter) { + return [printCharacterInOctal, printOnePrintingCharacter](unsigned char C) { + if (isPrint(C)) { + printOnePrintingCharacter(static_cast(C)); + return; + } + printCharacterInOctal(C); + }; + }; + const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) { + const auto BeginPtr = Data.begin(), EndPtr = Data.end(); + for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) { + printOneCharacter(C); + OS << ','; + } + printOneCharacter(*(EndPtr - 1)); + }; + switch (ACLS) { + case MCAsmInfo::ACLS_Unknown: + printCharacterList(printCharacterInOctal); + return; + case MCAsmInfo::ACLS_SingleQuotePrefix: + printCharacterList(printOneCharacterFor([&OS](char C) { + const char AsmCharLitBuf[2] = {'\'', C}; + OS << StringRef(AsmCharLitBuf, sizeof(AsmCharLitBuf)); + })); + return; + } + llvm_unreachable("Invalid AsmCharLiteralSyntax value!"); +} + static void PrintQuotedString(StringRef Data, raw_ostream &OS) { OS << '"'; @@ -1009,33 +1050,42 @@ void MCAsmStreamer::emitBytes(StringRef Data) { "Cannot emit contents before setting section!"); if (Data.empty()) return; - // If only single byte is provided or no ascii or asciz directives is - // supported, emit as vector of 8bits data. - if (Data.size() == 1 || - !(MAI->getAscizDirective() || MAI->getAsciiDirective())) { - if (MCTargetStreamer *TS = getTargetStreamer()) { - TS->emitRawBytes(Data); + const auto emitAsString = [this](StringRef Data) { + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii or a byte-list directive + if (MAI->getAscizDirective() && Data.back() == 0) { + OS << MAI->getAscizDirective(); + Data = Data.substr(0, Data.size() - 1); + } else if (LLVM_LIKELY(MAI->getAsciiDirective())) { + OS << MAI->getAsciiDirective(); + } else if (MAI->getByteListDirective()) { + OS << MAI->getByteListDirective(); + PrintByteList(Data, OS, MAI->characterLiteralSyntax()); + EmitEOL(); + return true; } else { - const char *Directive = MAI->getData8bitsDirective(); - for (const unsigned char C : Data.bytes()) { - OS << Directive << (unsigned)C; - EmitEOL(); - } + return false; } + + PrintQuotedString(Data, OS); + EmitEOL(); + return true; + }; + + if (Data.size() != 1 && emitAsString(Data)) return; - } - // If the data ends with 0 and the target supports .asciz, use it, otherwise - // use .ascii - if (MAI->getAscizDirective() && Data.back() == 0) { - OS << MAI->getAscizDirective(); - Data = Data.substr(0, Data.size()-1); - } else { - OS << MAI->getAsciiDirective(); + // Only single byte is provided or no ascii, asciz, or byte-list directives + // are applicable. Emit as vector of individual 8bits data elements. + if (MCTargetStreamer *TS = getTargetStreamer()) { + TS->emitRawBytes(Data); + return; + } + const char *Directive = MAI->getData8bitsDirective(); + for (const unsigned char C : Data.bytes()) { + OS << Directive << (unsigned)C; + EmitEOL(); } - - PrintQuotedString(Data, OS); - EmitEOL(); } void MCAsmStreamer::emitBinaryData(StringRef Data) { diff --git a/llvm/test/CodeGen/PowerPC/aix-bytestring.ll b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll new file mode 100644 index 0000000000000..443c019c9e30c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll @@ -0,0 +1,7 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s + +@str = constant [256 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7F\80\81\82\83\84\85\86\87\88\89\8A\8B\8C\8D\8E\8F\90\91\92\93\94\95\96\97\98\99\9A\9B\9C\9D\9E\9F\A0\A1\A2\A3\A4\A5\A6\A7\A8\A9\AA\AB\AC\AD\AE\AF\B0\B1\B2\B3\B4\B5\B6\B7\B8\B9\BA\BB\BC\BD\BE\BF\C0\C1\C2\C3\C4\C5\C6\C7\C8\C9\CA\CB\CC\CD\CE\CF\D0\D1\D2\D3\D4\D5\D6\D7\D8\D9\DA\DB\DC\DD\DE\DF\E0\E1\E2\E3\E4\E5\E6\E7\E8\E9\EA\EB\EC\ED\EE\EF\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\FA\FB\FC\FD\FE\FF\00", align 1 + +; CHECK-LABEL:str: +; CHECK-NEXT: .byte 0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,' ,'!,'",'#,'$,'%,'&,'','(,'),'*,'+,',,'-,'.,'/,'0,'1,'2,'3,'4,'5,'6,'7,'8,'9,':,';,'<,'=,'>,'?,'@,'A,'B,'C,'D,'E,'F,'G,'H,'I,'J,'K,'L,'M,'N,'O,'P,'Q,'R,'S,'T,'U,'V,'W,'X,'Y,'Z,'[,'\,'],'^,'_,'`,'a,'b,'c,'d,'e,'f,'g,'h,'i,'j,'k,'l,'m,'n,'o,'p,'q,'r,'s,'t,'u,'v,'w,'x,'y,'z,'{,'|,'},'~,0177,0200,0201,0202,0203,0204,0205,0206,0207,0210,0211,0212,0213,0214,0215,0216,0217,0220,0221,0222,0223,0224,0225,0226,0227,0230,0231,0232,0233,0234,0235,0236,0237,0240,0241,0242,0243,0244,0245,0246,0247,0250,0251,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267,0270,0271,0272,0273,0274,0275,0276,0277,0300,0301,0302,0303,0304,0305,0306,0307,0310,0311,0312,0313,0314,0315,0316,0317,0320,0321,0322,0323,0324,0325,0326,0327,0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0345,0346,0347,0350,0351,0352,0353,0354,0355,0356,0357,0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0372,0373,0374,0375,0376,0377,0000 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll index 4083bd58fe98b..88c8b08bdb59f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll @@ -86,10 +86,7 @@ ; CHECK: .globl chrarray ; CHECK-NEXT: chrarray: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 'a,'b,'c,'d ; CHECK: .globl dblarr ; CHECK-NEXT: .align 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll index 42ead4b9b4de7..0d29857fd1556 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll @@ -41,30 +41,9 @@ entry: ; CHECK-NEXT: .vbyte 4, 0 # 0x0 ; CHECK-NEXT: .csect .rodata.str1.1[RO],2 ; CHECK-NEXT: L..strA: -; CHECK-NEXT: .byte 104 -; CHECK-NEXT: .byte 101 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 111 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 119 -; CHECK-NEXT: .byte 111 -; CHECK-NEXT: .byte 114 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 100 -; CHECK-NEXT: .byte 33 -; CHECK-NEXT: .byte 10 -; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 'h,'e,'l,'l,'o,' ,'w,'o,'r,'l,'d,'!,0012,0000 ; CHECK-NEXT: L...str: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 -; CHECK-NEXT: .byte 101 -; CHECK-NEXT: .byte 102 -; CHECK-NEXT: .byte 103 -; CHECK-NEXT: .byte 104 -; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 'a,'b,'c,'d,'e,'f,'g,'h,0000 ; CHECKOBJ: 00000010 <.rodata.str2.2>: ; CHECKOBJ-NEXT: 10: 01 08 01 10 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll index dddbe2ba089e8..a7bb018966429 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll @@ -53,10 +53,7 @@ ; CHECK64-NEXT: .vbyte 8, 0x408c200000000000 ; CHECK-NEXT: .globl const_chrarray ; CHECK-NEXT: const_chrarray: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 'a,'b,'c,'d ; CHECK-NEXT: .globl const_dblarr ; CHECK-NEXT: .align 3 ; CHECK-NEXT: const_dblarr: From 618a890b72f874cbc41168737d03f724f58805fc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Sep 2020 10:51:49 -0700 Subject: [PATCH 113/544] [X86] Increase the depth threshold required to form VPERMI2W/VPERMI2B in shuffle combining These instructions are implemented with two port 5 uops and one port 015 uop so they are more complicated that most shuffles. This patch increases the depth threshold for when we form them during shuffle combining to try to limit increasing the number of uops especially on port 5. Differential Revision: https://reviews.llvm.org/D88503 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +-- .../CodeGen/X86/min-legal-vector-width.ll | 36 +++--- .../CodeGen/X86/vector-shuffle-128-v16.ll | 45 ++----- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 112 ++++-------------- .../CodeGen/X86/vector-shuffle-256-v32.ll | 28 +---- llvm/test/CodeGen/X86/vector-zext.ll | 19 +-- 6 files changed, 71 insertions(+), 188 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2a7f028d37896..4b3adc7dcfbc9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35351,6 +35351,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Depth threshold above which we can efficiently use variable mask shuffles. int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; + // VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a + // higher depth before combining them. + bool AllowBWIVPERMV3 = (Depth >= 2 || HasVariableMask); bool MaskContainsZeros = isAnyZero(Mask); @@ -35387,9 +35390,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasBWI() && + (Subtarget.hasBWI() && AllowBWIVPERMV3 && (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || - (Subtarget.hasVBMI() && + (Subtarget.hasVBMI() && AllowBWIVPERMV3 && (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { // Adjust shuffle mask - replace SM_SentinelZero with second source index. for (unsigned i = 0; i != NumMaskElts; ++i) @@ -35416,9 +35419,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && + (Subtarget.hasBWI() && AllowBWIVPERMV3 && (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || - (Subtarget.hasVBMI() && + (Subtarget.hasVBMI() && AllowBWIVPERMV3 && (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); @@ -35588,10 +35591,10 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasBWI() && (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || - MaskVT == MVT::v32i16)) || - (Subtarget.hasVBMI() && (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || - MaskVT == MVT::v64i8)))) { + (Subtarget.hasBWI() && AllowBWIVPERMV3 && + (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && AllowBWIVPERMV3 && + (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index e5240d5e246a6..a39fbf878fd9f 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -857,10 +857,10 @@ define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-wi define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i32_zeroes: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsrlq $48, 32(%rdi), %ymm1 -; CHECK-NEXT: vpsrlq $48, (%rdi), %ymm2 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] -; CHECK-NEXT: vpermi2w %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpsrlq $48, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsrlq $48, (%rdi), %ymm1 +; CHECK-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: retq %a = load <8 x i64>, <8 x i64>* %x %b = lshr <8 x i64> %a, @@ -920,9 +920,10 @@ define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vect define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i32_v16i16_sign: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31] -; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm0 +; CHECK-NEXT: vpsrad $16, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsrad $16, (%rdi), %ymm1 +; CHECK-NEXT: vpackssdw %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: retq %a = load <16 x i32>, <16 x i32>* %x %b = ashr <16 x i32> %a, @@ -931,20 +932,13 @@ define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal- } define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" { -; CHECK-AVX512-LABEL: trunc_v32i16_v32i8_sign: -; CHECK-AVX512: # %bb.0: -; CHECK-AVX512-NEXT: vpsraw $8, 32(%rdi), %ymm0 -; CHECK-AVX512-NEXT: vpsraw $8, (%rdi), %ymm1 -; CHECK-AVX512-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 -; CHECK-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; CHECK-AVX512-NEXT: retq -; -; CHECK-VBMI-LABEL: trunc_v32i16_v32i8_sign: -; CHECK-VBMI: # %bb.0: -; CHECK-VBMI-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63] -; CHECK-VBMI-NEXT: vpermi2b 32(%rdi), %ymm1, %ymm0 -; CHECK-VBMI-NEXT: retq +; CHECK-LABEL: trunc_v32i16_v32i8_sign: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsraw $8, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsraw $8, (%rdi), %ymm1 +; CHECK-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; CHECK-NEXT: retq %a = load <32 x i16>, <32 x i16>* %x %b = ashr <32 x i16> %a, %c = trunc <32 x i16> %b to <32 x i8> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index fb300a88b4120..ee3cf43e8f2f7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -304,24 +304,11 @@ define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07( ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,16,0,17,0,18,0,19,0,20,0,21,0,22,0,23] -; AVX512VLVBMI-NEXT: vpermi2b %xmm0, %xmm1, %xmm2 -; AVX512VLVBMI-NEXT: vmovdqa %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2OR512VL-NEXT: retq ; ; XOPAVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: ; XOPAVX1: # %bb.0: @@ -1335,23 +1322,11 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23( ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] -; AVX1OR2-NEXT: retq -; -; AVX512VLBW-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VLBW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,16,1,17,4,20,5,21,2,18,3,19,6,22,7,23] -; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: +; AVX: # %bb.0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> ret <16 x i8> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index c72d736960f96..f7baebf7c4e4f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1017,23 +1017,11 @@ define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_0c1d2e3f: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_0c1d2e3f: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_0c1d2e3f: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,12,1,13,2,14,3,15] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_0c1d2e3f: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1059,23 +1047,11 @@ define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_48596a7b: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_48596a7b: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_48596a7b: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8,5,9,6,10,7,11] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_48596a7b: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1424,23 +1400,11 @@ define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_012dXXXX: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_012dXXXX: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_012dXXXX: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,4,5,6,7] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_012dXXXX: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1475,24 +1439,11 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i16_XXXXcde3: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] -; AVX2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXXXcde3: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_XXXXcde3: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,11] -; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2 -; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] +; AVX2OR512VL-NEXT: retq ; ; XOPAVX1-LABEL: shuffle_v8i16_XXXXcde3: ; XOPAVX1: # %bb.0: @@ -1533,24 +1484,11 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v8i16_cde3XXXX: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] -; AVX1OR2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i16_cde3XXXX: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i16_cde3XXXX: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,11,4,5,6,7] -; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2 -; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX-LABEL: shuffle_v8i16_cde3XXXX: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 23bf91de6e7e8..e3eed625dab3b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -4804,29 +4804,11 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: PR28136: -; AVX2: # %bb.0: -; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: PR28136: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-SLOW-LABEL: PR28136: -; AVX512VLVBMI-SLOW: # %bb.0: -; AVX512VLVBMI-SLOW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512VLVBMI-SLOW-NEXT: retq -; -; AVX512VLVBMI-FAST-LABEL: PR28136: -; AVX512VLVBMI-FAST: # %bb.0: -; AVX512VLVBMI-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,16,48,17,49,18,50,19,51,4,36,5,37,6,38,7,39,20,52,21,53,22,54,23,55] -; AVX512VLVBMI-FAST-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 -; AVX512VLVBMI-FAST-NEXT: retq +; AVX2OR512VL-LABEL: PR28136: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2OR512VL-NEXT: retq ; ; XOPAVX1-LABEL: PR28136: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 2ad16f2e04c5b..0132e901e6b3a 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -1902,20 +1902,11 @@ define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX2-NEXT: retq ; -; AVX512F-LABEL: shuf_zext_8i16_to_4i64_offset2: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] -; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: shuf_zext_8i16_to_4i64_offset2: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,33,34,35,3,37,38,39,4,41,42,43,5,45,46,47] -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] +; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <4 x i64> From 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 29 Sep 2020 14:39:54 -0700 Subject: [PATCH 114/544] [GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR. During lowering of G_UMULO and friends, the previous code moved the builder's insertion point to be after the legalizing instruction. When that happened, if there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder would try to find that constant during the buildConstant(zero) call, and since it dominates itself would return the iterator unchanged, even though the def of the constant was *after* the current insertion point. This resulted in the compare being generated *before* the constant which it was using. There's no need to modify the insertion point before building the mul-hi or constant. Delaying moving the insert point ensures those are built/CSEd before the G_ICMP is built. Fixes PR47679 Differential Revision: https://reviews.llvm.org/D88514 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 5 +- .../AArch64/GlobalISel/legalize-mul.mir | 68 ++++++++++++++++++- .../CodeGen/Mips/GlobalISel/legalizer/mul.mir | 2 +- .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll | 12 ++-- 4 files changed, 76 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e8bc4067c127e..45ac2b7b67119 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2892,11 +2892,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { MI.RemoveOperand(1); Observer.changedInstr(MI); - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); auto Zero = MIRBuilder.buildConstant(Ty, 0); + // Move insert point forward so we can use the Res register if needed. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index 84c839f7b341b..20af216aaeb5e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -28,8 +28,8 @@ body: | ; CHECK-LABEL: name: test_smul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]](s64) ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]] @@ -51,9 +51,9 @@ body: | ; CHECK-LABEL: name: test_umul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] ; CHECK: $x0 = COPY [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) @@ -91,3 +91,67 @@ body: | $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0 ... +--- +name: test_umulo_overflow_no_invalid_mir +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, size: 8, alignment: 8 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 16, alignment: 16 } + - { id: 3, size: 16, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0 + ; before it's been defined. + ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3 + ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8) + ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] + ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: $x0 = COPY [[MUL]](s64) + ; CHECK: $x1 = COPY [[AND]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %25:_(s32) = G_CONSTANT i32 0 + %3:_(p0) = G_FRAME_INDEX %stack.0 + %4:_(p0) = G_FRAME_INDEX %stack.1 + %6:_(p0) = G_FRAME_INDEX %stack.3 + G_STORE %2(s64), %3(p0) :: (store 8) + G_STORE %1(s64), %4(p0) :: (store 8) + %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8) + %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8) + %9:_(s64), %10:_(s1) = G_UMULO %7, %8 + %31:_(s64) = G_CONSTANT i64 0 + G_STORE %31(s64), %6(p0) :: (store 8, align 1) + %16:_(s64) = G_ZEXT %10(s1) + $x0 = COPY %9(s64) + $x1 = COPY %16(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index c92a55d0af322..b146aa5ff13d5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -439,9 +439,9 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index 659eadf181c02..f7250ccde898f 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) { ; MIPS32-LABEL: umul_with_overflow: ; MIPS32: # %bb.0: -; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: multu $4, $5 -; MIPS32-NEXT: mfhi $2 -; MIPS32-NEXT: sltu $2, $zero, $2 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: sb $2, 0($7) -; MIPS32-NEXT: sw $1, 0($6) +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: mul $2, $4, $5 +; MIPS32-NEXT: sltu $1, $zero, $1 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sb $1, 0($7) +; MIPS32-NEXT: sw $2, 0($6) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) From 6f01c53f26af7fb0393464079ec5e839a497d4da Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Tue, 29 Sep 2020 22:17:12 -0400 Subject: [PATCH 115/544] Remove further OpenBSD/sparc bits --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index d545bb5514741..e3723e213c52f 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1057,8 +1057,6 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { break; case llvm::Triple::ppc: - case llvm::Triple::sparc: - case llvm::Triple::sparcel: case llvm::Triple::sparcv9: IsPICLevelTwo = true; // "-fPIE" break; From e6e73712ddfa18d4a2937a5775990dcefc8bd2f7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 29 Sep 2020 20:12:00 -0700 Subject: [PATCH 116/544] [gn build] Add missing dependency to Extensions --- llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn index dccbd5c2d530a..e580187ff96ea 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn @@ -1,4 +1,5 @@ static_library("Extensions") { output_name = "LLVMExtensions" sources = [ "Extensions.cpp" ] + deps = [ "//llvm/lib/Support" ] } From 1c5aa8aeca29c7d4b891e5b60b25fdb74f9bf0e9 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Wed, 30 Sep 2020 03:55:54 +0000 Subject: [PATCH 117/544] [mlir] Update docs referencing OpTrait::Symbol. Since https://reviews.llvm.org/D78522, Symbol is not a Trait itself. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D88512 --- mlir/docs/Interfaces.md | 4 ++++ mlir/docs/SymbolsAndSymbolTables.md | 8 ++++---- mlir/docs/Traits.md | 7 ------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/mlir/docs/Interfaces.md b/mlir/docs/Interfaces.md index 3eb3bd8818775..633e43e42da45 100644 --- a/mlir/docs/Interfaces.md +++ b/mlir/docs/Interfaces.md @@ -228,3 +228,7 @@ format of the header for each interface section goes as follows: - RegionKind::Graph - represents a graph region without control flow semantics - RegionKind::SSACFG - represents an [SSA-style control flow](LangRef.md#modeling-control-flow) region with basic blocks and reachability - `hasSSADominance(unsigned index)` - Return true if the region with the given index inside this operation requires dominance. + +##### SymbolInterfaces + +* `SymbolOpInterface` - Used to represent [`Symbol`](SymbolsAndSymbolTables.md#symbol) operations which reside immediately within a region that defines a [`SymbolTable`](SymbolsAndSymbolTables.md#symbol-table). diff --git a/mlir/docs/SymbolsAndSymbolTables.md b/mlir/docs/SymbolsAndSymbolTables.md index c004435fc040a..2b4301e43d2fa 100644 --- a/mlir/docs/SymbolsAndSymbolTables.md +++ b/mlir/docs/SymbolsAndSymbolTables.md @@ -37,10 +37,10 @@ link, or use, to the symbol. An example of a `Symbol` operation is ### Defining a Symbol -A `Symbol` operation may use the `OpTrait::Symbol` trait to provide the -necessary verification and accessors, but this is not required as some -operations, such as `module`, conditionally define a symbol. `Symbol`s must have -the following properties: +A `Symbol` operation should use the `SymbolOpInterface` interface to provide the +necessary verification and accessors; it also supports +operations, such as `module`, that conditionally define a symbol. `Symbol`s must +have the following properties: * A `StringAttr` attribute named 'SymbolTable::getSymbolAttrName()'(`sym_name`). diff --git a/mlir/docs/Traits.md b/mlir/docs/Traits.md index 5867f220e97b4..3fa56249ae429 100644 --- a/mlir/docs/Traits.md +++ b/mlir/docs/Traits.md @@ -267,13 +267,6 @@ associated with that memory reference. This trait provides APIs and verifiers for operations with regions that have a single block that must terminate with `TerminatorOpType`. -### Symbol - -* `OpTrait::Symbol` -- `Symbol` - -This trait is used for operations that define a -[`Symbol`](SymbolsAndSymbolTables.md#symbol). - ### SymbolTable * `OpTrait::SymbolTable` -- `SymbolTable` From 4e4f926e83cf77f0d36b821a3d2aa1de78338a82 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Wed, 30 Sep 2020 11:07:55 +0700 Subject: [PATCH 118/544] Remove test AST/const-fpfeatures-diag.c This test is going to be removed because using dynamic rounding mode in initializers is changing. It also causes build failures in some cases, so remove it now. --- clang/test/AST/const-fpfeatures-diag.c | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 clang/test/AST/const-fpfeatures-diag.c diff --git a/clang/test/AST/const-fpfeatures-diag.c b/clang/test/AST/const-fpfeatures-diag.c deleted file mode 100644 index d0408dae36631..0000000000000 --- a/clang/test/AST/const-fpfeatures-diag.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_cc1 -verify -ffp-exception-behavior=strict -Wno-unknown-pragmas %s - -// REQUIRES: x86-registered-target - -#pragma STDC FENV_ROUND FE_DYNAMIC - -// nextUp(1.F) == 0x1.000002p0F - -float F1 = 0x1.000000p0F + 0x0.000002p0F; -float F2 = 0x1.000000p0F + 0x0.000001p0F; // expected-error{{initializer element is not a compile-time constant}} From 154860af338f7b0c82cb04e91d6f199aa72cfdff Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Sep 2020 23:00:18 -0700 Subject: [PATCH 119/544] [lldb] Use config.lldb_src_root in lit_config.load_config (NFC) Rather than relaying on CMake to substitute the full path to the lldb source root, use the value set in config.lldb_src_root. This makes it slightly easier to write a custom lit.site.cfg.py. --- lldb/test/API/lit.site.cfg.py.in | 2 +- lldb/test/Shell/lit.site.cfg.py.in | 3 ++- lldb/test/Unit/lit.site.cfg.py.in | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 0481e8fecc73a..ce2ff8e21d0b9 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -69,4 +69,4 @@ except KeyError as e: lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) # Let the main config do the real work. -lit_config.load_config(config, "@LLDB_SOURCE_DIR@/test/API/lit.cfg.py") +lit_config.load_config(config, os.path.join(config.lldb_src_root, "test", "API", "lit.cfg.py")) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index ff4de9d527dea..6cddd3937628d 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -6,6 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.llvm_libs_dir = "@LLVM_LIBS_DIR@" config.llvm_shlib_dir = "@SHLIBDIR@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" +config.lldb_src_root = "@LLDB_SOURCE_DIR@" config.lldb_obj_root = "@LLDB_BINARY_DIR@" config.lldb_libs_dir = "@LLDB_LIBS_DIR@" config.lldb_tools_dir = "@LLDB_TOOLS_DIR@" @@ -42,4 +43,4 @@ import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. -lit_config.load_config(config, "@LLDB_SOURCE_DIR@/test/Shell/lit.cfg.py") +lit_config.load_config(config, os.path.join(config.lldb_src_root, "test", "Shell", "lit.cfg.py")) diff --git a/lldb/test/Unit/lit.site.cfg.py.in b/lldb/test/Unit/lit.site.cfg.py.in index e2035d678cd98..c0627b772362f 100644 --- a/lldb/test/Unit/lit.site.cfg.py.in +++ b/lldb/test/Unit/lit.site.cfg.py.in @@ -26,4 +26,4 @@ import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. -lit_config.load_config(config, "@LLDB_SOURCE_DIR@/test/Unit/lit.cfg.py") +lit_config.load_config(config, os.path.join(config.lldb_src_root, "test", "Unit", "lit.cfg.py")) From 195c22f2733cf923b932412f0fe212f4ef397d2c Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 24 Sep 2020 14:02:53 +0100 Subject: [PATCH 120/544] [ARM] Change VPT state assertion Just because we haven't encountered an instruction setting the VPR, it doesn't mean we can't create a VPT block - the VPR maybe a live-in. Differential Revision: https://reviews.llvm.org/D88224 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 3 +- .../begin-vpt-without-inst.mir | 117 ++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 636359d801d9c..72e772e7bb516 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -187,7 +187,8 @@ namespace { std::unique_ptr> PredicatedInsts; static void CreateVPTBlock(MachineInstr *MI) { - assert(CurrentPredicates.size() && "Can't begin VPT without predicate"); + assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR)) + && "Can't begin VPT without predicate"); Blocks.emplace_back(MI); // The execution of MI is predicated upon the current set of instructions // that are AND'ed together to form the VPR predicate value. In the case diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir new file mode 100644 index 0000000000000..1930acad3ec49 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir @@ -0,0 +1,117 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + @arr = external dso_local local_unnamed_addr global [0 x i32], align 4 + + define dso_local arm_aapcs_vfpcc void @foo(i32 %i) { + entry: + %tobool.not11 = icmp eq i32 %i, 0 + br i1 %tobool.not11, label %for.end5, label %vector.ph.preheader + + vector.ph.preheader: ; preds = %entry + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3) + br label %vector.ph + + vector.ph: ; preds = %vector.ph.preheader, %vector.ph + %i.addr.012 = phi i32 [ %math, %vector.ph ], [ %i, %vector.ph.preheader ] + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> , <4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*), i32 4, <4 x i1> %active.lane.mask) + %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.addr.012, i32 1) + %math = extractvalue { i32, i1 } %0, 0 + %ov = extractvalue { i32, i1 } %0, 1 + br i1 %ov, label %for.end5, label %vector.ph + + for.end5: ; preds = %vector.ph, %entry + ret void + } + + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) + declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) + +... +--- +name: foo +alignment: 8 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + maxAlignment: 1 + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: + - id: 0 + value: '<4 x i32> ' + alignment: 8 + isTargetSpecific: false +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.3(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $r0 + ; CHECK: tCBZ $r0, %bb.3 + ; CHECK: bb.1.vector.ph.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0 + ; CHECK: renamable $r1 = tLEApcrel %const.0, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 3, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @arr, 14 /* CC::al */, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @arr, 14 /* CC::al */, $noreg + ; CHECK: renamable $vpr = MVE_VCMPu32 killed renamable $q0, killed renamable $q1, 8, 0, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 2, 0, $noreg, undef renamable $q0 + ; CHECK: bb.2.vector.ph: + ; CHECK: successors: %bb.3(0x04000000), %bb.2(0x7c000000) + ; CHECK: liveins: $vpr, $q0, $r0, $r1 + ; CHECK: renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store 16 into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) + ; CHECK: tBcc %bb.2, 3 /* CC::lo */, killed $cpsr + ; CHECK: bb.3.for.end5: + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + ; CHECK: bb.4 (align 8): + ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 16 + bb.0.entry: + successors: %bb.3(0x30000000), %bb.1(0x50000000) + liveins: $r0 + + tCBZ $r0, %bb.3 + + bb.1.vector.ph.preheader: + successors: %bb.2(0x80000000) + liveins: $r0 + + renamable $r1 = tLEApcrel %const.0, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 3, 0, $noreg, undef renamable $q0 + renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + $r1 = t2MOVi16 target-flags(arm-lo16) @arr, 14 /* CC::al */, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @arr, 14 /* CC::al */, $noreg + renamable $vpr = MVE_VCMPu32 killed renamable $q0, killed renamable $q1, 8, 0, $noreg + renamable $q0 = MVE_VMOVimmi32 2, 0, $noreg, undef renamable $q0 + + bb.2.vector.ph: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + liveins: $vpr, $q0, $r0, $r1 + + renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + MVE_VPST 8, implicit $vpr + MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store 16 into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) + tBcc %bb.2, 3 /* CC::lo */, killed $cpsr + + bb.3.for.end5: + tBX_RET 14 /* CC::al */, $noreg + + bb.4 (align 8): + CONSTPOOL_ENTRY 0, %const.0, 16 + +... From 700f93e92b6d4cdbab66133f75c143c9677f2d41 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 28 Sep 2020 15:46:56 +0100 Subject: [PATCH 121/544] [RDA] Switch isSafeToMove iterators So forwards is forwards and backwards is reverse. Also add a check so that we know the instructions are in the expected order. Differential Revision: https://reviews.llvm.org/D88419 --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 16 +++++++++--- .../Thumb2/LowOverheadLoops/it-block-mov.mir | 2 +- .../lstp-insertion-position.mir | 2 +- .../Thumb2/LowOverheadLoops/mov-operand.ll | 2 +- .../move-def-before-start.mir | 25 +++++++++++++------ .../LowOverheadLoops/move-start-after-def.mir | 21 ++++++++++------ 6 files changed, 48 insertions(+), 20 deletions(-) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index f553bad31b943..63989bd2317f3 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -540,7 +540,7 @@ static bool mayHaveSideEffects(MachineInstr &MI) { template bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From, MachineInstr *To) const { - if (From->getParent() != To->getParent()) + if (From->getParent() != To->getParent() || From == To) return false; SmallSet Defs; @@ -569,12 +569,22 @@ bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From, bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const { - return isSafeToMove(From, To); + using Iterator = MachineBasicBlock::iterator; + // Walk forwards until we find the instruction. + for (auto I = Iterator(From), E = From->getParent()->end(); I != E; ++I) + if (&*I == To) + return isSafeToMove(From, To); + return false; } bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const { - return isSafeToMove(From, To); + using Iterator = MachineBasicBlock::reverse_iterator; + // Walk backwards until we find the instruction. + for (auto I = Iterator(From), E = From->getParent()->rend(); I != E; ++I) + if (&*I == To) + return isSafeToMove(From, To); + return false; } bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir index 429a88884db91..f63d3fde7dee7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir @@ -78,8 +78,8 @@ body: | ; CHECK: successors: %bb.5(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg - ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: $lr = t2DLS killed $r4 + ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0 ; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir index 92e2a54cffa9b..3e7c87de0282c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir @@ -292,9 +292,9 @@ body: | ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) - ; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index c176fcabdfb61..b97204c69f321 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -26,9 +26,9 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 -; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: dls lr, r4 ; CHECK-NEXT: vadd.f32 s0, s3, s3 +; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 ; CHECK-NEXT: vmov r12, s0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir index 734bcc106785e..ea3589f48fdb7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir @@ -117,21 +117,32 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir index a8f084474b0c7..0295acb67962d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir @@ -118,24 +118,31 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: dead renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: From 834b6470d9f111c355053ecff8bed71bf44a6624 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 08:36:48 +0100 Subject: [PATCH 122/544] [NFC][ARM] Add LowOverheadLoop test --- .../LowOverheadLoops/it-block-chain-store.mir | 301 ++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir new file mode 100644 index 0000000000000..c5713c8224b5e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir @@ -0,0 +1,301 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + define hidden arm_aapcs_vfpcc void @it_block_store_count_before_start(float* %pSrc, float* %pDst, i32 %blockSize, i32* %iter.addr) #0 { + entry: + %mul = shl i32 %blockSize, 1 + %0 = add i32 %mul, 3 + %1 = icmp slt i32 %mul, 4 + %smin = select i1 %1, i32 %mul, i32 4 + %2 = sub i32 %0, %smin + %3 = lshr i32 %2, 2 + %4 = add nuw nsw i32 %3, 1 + store i32 %4, i32* %iter.addr, align 4 + call void @llvm.set.loop.iterations.i32(i32 %4) + br label %do.body + + do.body: ; preds = %do.body, %entry + %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %4, %entry ] + %blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ] + %pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pDst.addr.01 = bitcast float* %pDst.addr.0 to <4 x float>* + %pSrc.addr.02 = bitcast float* %pSrc.addr.0 to <4 x float>* + %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef) + %7 = fmul <4 x float> %6, %6 + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %7, <4 x float>* %pDst.addr.01, i32 4, <4 x i1> %5) + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %add.ptr4 = getelementptr inbounds float, float* %pDst.addr.0, i32 4 + %sub = add nsw i32 %blkCnt.0, -4 + %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) + %9 = icmp ne i32 %8, 0 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + br i1 %9, label %do.body, label %do.end + + do.end: ; preds = %do.body + ret void + } + + define hidden arm_aapcs_vfpcc void @it_block_store_count_after_start(float* %pSrc, float* %pDst, i32 %blockSize, i32* %iter.addr) #0 { + entry: + %mul = shl i32 %blockSize, 1 + %0 = add i32 %mul, 3 + %1 = icmp slt i32 %mul, 4 + %smin = select i1 %1, i32 %mul, i32 4 + %2 = sub i32 %0, %smin + %3 = lshr i32 %2, 2 + %4 = add nuw nsw i32 %3, 1 + call void @llvm.set.loop.iterations.i32(i32 %4) + store i32 %4, i32* %iter.addr, align 4 + br label %do.body + + do.body: ; preds = %do.body, %entry + %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %4, %entry ] + %blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ] + %pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pDst.addr.01 = bitcast float* %pDst.addr.0 to <4 x float>* + %pSrc.addr.02 = bitcast float* %pSrc.addr.0 to <4 x float>* + %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef) + %7 = fmul <4 x float> %6, %6 + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %7, <4 x float>* %pDst.addr.01, i32 4, <4 x i1> %5) + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %add.ptr4 = getelementptr inbounds float, float* %pDst.addr.0, i32 4 + %sub = add nsw i32 %blkCnt.0, -4 + %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) + %9 = icmp ne i32 %8, 0 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + br i1 %9, label %do.body, label %do.end + + do.end: ; preds = %do.body + ret void + } + + ; Function Attrs: nounwind readnone + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 + + ; Function Attrs: argmemonly nounwind readonly willreturn + declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 + + ; Function Attrs: argmemonly nounwind willreturn writeonly + declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) #3 + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #4 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4 + + attributes #0 = { "target-features"="+mve.fp" } + attributes #1 = { nounwind readnone "target-features"="+mve.fp" } + attributes #2 = { argmemonly nounwind readonly willreturn "target-features"="+mve.fp" } + attributes #3 = { argmemonly nounwind willreturn writeonly "target-features"="+mve.fp" } + attributes #4 = { noduplicate nounwind "target-features"="+mve.fp" } + +... +--- +name: it_block_store_count_before_start +alignment: 2 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: it_block_store_count_before_start + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate + ; CHECK: renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 + ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: bb.1.do.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.do.end: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg + t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate + renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg + renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg + t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + t2DoLoopStart renamable $lr + $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg + + bb.1.do.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $r0, $r1, $r2, $r12 + + $lr = tMOVr $r2, 14 /* CC::al */, $noreg + renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg + renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg + renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + MVE_VPST 8, implicit $vpr + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) + renamable $lr = t2LoopDec killed renamable $lr, 1 + renamable $q0 = MVE_VMULf32 killed renamable $q0, renamable $q0, 0, $noreg, undef renamable $q0 + MVE_VPST 8, implicit $vpr + renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.do.end: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + +... +--- +name: it_block_store_count_after_start +alignment: 2 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: it_block_store_count_after_start + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate + ; CHECK: renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 + ; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: bb.1.do.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.do.end: + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg + renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg + t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 11, 8, implicit-def $itstate + $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate + renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg + renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg + renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg + t2DoLoopStart renamable $lr + t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg + + bb.1.do.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $r0, $r1, $r2, $r12 + + $lr = tMOVr $r2, 14 /* CC::al */, $noreg + renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg + renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg + renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + MVE_VPST 8, implicit $vpr + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) + renamable $lr = t2LoopDec killed renamable $lr, 1 + renamable $q0 = MVE_VMULf32 killed renamable $q0, renamable $q0, 0, $noreg, undef renamable $q0 + MVE_VPST 8, implicit $vpr + renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.do.end: + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc + +... From 7ba0779fbb41b6fa8213aa31622ff45484037eb4 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 29 Sep 2020 10:37:46 +0200 Subject: [PATCH 123/544] [clangd] Extract options struct for ClangdLSPServer. NFC In preparation for making moving TweakFilter from ClangdServer::Options to a ClangdLSPServer option, and letting it vary per-request. (In order to implement CodeActionParams.only) Also a general overdue cleanup. Differential Revision: https://reviews.llvm.org/D88470 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 88 +++++++++---------- clang-tools-extra/clangd/ClangdLSPServer.h | 36 ++++---- clang-tools-extra/clangd/tool/ClangdMain.cpp | 45 +++++----- .../clangd/unittests/ClangdLSPServerTests.cpp | 15 ++-- 4 files changed, 84 insertions(+), 100 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index fa4a4ab86a8ce..a85736b948300 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -395,7 +395,7 @@ class ClangdLSPServer::MessageHandler : public Transport::MessageHandler { Context handlerContext() const { return Context::current().derive( kCurrentOffsetEncoding, - Server.NegotiatedOffsetEncoding.getValueOr(OffsetEncoding::UTF16)); + Server.Opts.OffsetEncoding.getValueOr(OffsetEncoding::UTF16)); } // We run cancelable requests in a context that does two things: @@ -465,43 +465,42 @@ static std::vector semanticTokenTypes() { void ClangdLSPServer::onInitialize(const InitializeParams &Params, Callback Reply) { // Determine character encoding first as it affects constructed ClangdServer. - if (Params.capabilities.offsetEncoding && !NegotiatedOffsetEncoding) { - NegotiatedOffsetEncoding = OffsetEncoding::UTF16; // fallback + if (Params.capabilities.offsetEncoding && !Opts.OffsetEncoding) { + Opts.OffsetEncoding = OffsetEncoding::UTF16; // fallback for (OffsetEncoding Supported : *Params.capabilities.offsetEncoding) if (Supported != OffsetEncoding::UnsupportedEncoding) { - NegotiatedOffsetEncoding = Supported; + Opts.OffsetEncoding = Supported; break; } } - ClangdServerOpts.TheiaSemanticHighlighting = + Opts.TheiaSemanticHighlighting = Params.capabilities.TheiaSemanticHighlighting; if (Params.capabilities.TheiaSemanticHighlighting && Params.capabilities.SemanticTokens) { log("Client supports legacy semanticHighlights notification and standard " "semanticTokens request, choosing the latter (no notifications)."); - ClangdServerOpts.TheiaSemanticHighlighting = false; + Opts.TheiaSemanticHighlighting = false; } if (Params.rootUri && *Params.rootUri) - ClangdServerOpts.WorkspaceRoot = std::string(Params.rootUri->file()); + Opts.WorkspaceRoot = std::string(Params.rootUri->file()); else if (Params.rootPath && !Params.rootPath->empty()) - ClangdServerOpts.WorkspaceRoot = *Params.rootPath; + Opts.WorkspaceRoot = *Params.rootPath; if (Server) return Reply(llvm::make_error("server already initialized", ErrorCode::InvalidRequest)); if (const auto &Dir = Params.initializationOptions.compilationDatabasePath) - CompileCommandsDir = Dir; - if (UseDirBasedCDB) { + Opts.CompileCommandsDir = Dir; + if (Opts.UseDirBasedCDB) { BaseCDB = std::make_unique( - CompileCommandsDir); - BaseCDB = getQueryDriverDatabase( - llvm::makeArrayRef(ClangdServerOpts.QueryDriverGlobs), - std::move(BaseCDB)); + Opts.CompileCommandsDir); + BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), + std::move(BaseCDB)); } auto Mangler = CommandMangler::detect(); - if (ClangdServerOpts.ResourceDir) - Mangler.ResourceDir = *ClangdServerOpts.ResourceDir; + if (Opts.ResourceDir) + Mangler.ResourceDir = *Opts.ResourceDir; CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, tooling::ArgumentsAdjuster(std::move(Mangler))); { @@ -510,19 +509,18 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, // Server, CDB, etc. WithContext MainContext(BackgroundContext.clone()); llvm::Optional WithOffsetEncoding; - if (NegotiatedOffsetEncoding) - WithOffsetEncoding.emplace(kCurrentOffsetEncoding, - *NegotiatedOffsetEncoding); - Server.emplace(*CDB, TFS, ClangdServerOpts, + if (Opts.OffsetEncoding) + WithOffsetEncoding.emplace(kCurrentOffsetEncoding, *Opts.OffsetEncoding); + Server.emplace(*CDB, TFS, Opts, static_cast(this)); } applyConfiguration(Params.initializationOptions.ConfigSettings); - CCOpts.EnableSnippets = Params.capabilities.CompletionSnippets; - CCOpts.IncludeFixIts = Params.capabilities.CompletionFixes; - if (!CCOpts.BundleOverloads.hasValue()) - CCOpts.BundleOverloads = Params.capabilities.HasSignatureHelp; - CCOpts.DocumentationFormat = + Opts.CodeComplete.EnableSnippets = Params.capabilities.CompletionSnippets; + Opts.CodeComplete.IncludeFixIts = Params.capabilities.CompletionFixes; + if (!Opts.CodeComplete.BundleOverloads.hasValue()) + Opts.CodeComplete.BundleOverloads = Params.capabilities.HasSignatureHelp; + Opts.CodeComplete.DocumentationFormat = Params.capabilities.CompletionDocumentationFormat; DiagOpts.EmbedFixesInDiagnostics = Params.capabilities.DiagnosticFixes; DiagOpts.SendDiagnosticCategory = Params.capabilities.DiagnosticCategory; @@ -622,14 +620,14 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, }}, {"typeHierarchyProvider", true}, }}}}; - if (NegotiatedOffsetEncoding) - Result["offsetEncoding"] = *NegotiatedOffsetEncoding; - if (ClangdServerOpts.TheiaSemanticHighlighting) + if (Opts.OffsetEncoding) + Result["offsetEncoding"] = *Opts.OffsetEncoding; + if (Opts.TheiaSemanticHighlighting) Result.getObject("capabilities") ->insert( {"semanticHighlighting", llvm::json::Object{{"scopes", buildHighlightScopeLookupTable()}}}); - if (ClangdServerOpts.FoldingRanges) + if (Opts.FoldingRanges) Result.getObject("capabilities")->insert({"foldingRangeProvider", true}); Reply(std::move(Result)); } @@ -788,7 +786,7 @@ void ClangdLSPServer::onWorkspaceSymbol( const WorkspaceSymbolParams &Params, Callback> Reply) { Server->workspaceSymbols( - Params.query, CCOpts.Limit, + Params.query, Opts.CodeComplete.Limit, [Reply = std::move(Reply), this](llvm::Expected> Items) mutable { if (!Items) @@ -803,7 +801,7 @@ void ClangdLSPServer::onWorkspaceSymbol( void ClangdLSPServer::onPrepareRename(const TextDocumentPositionParams &Params, Callback> Reply) { Server->prepareRename(Params.textDocument.uri.file(), Params.position, - RenameOpts, std::move(Reply)); + Opts.Rename, std::move(Reply)); } void ClangdLSPServer::onRename(const RenameParams &Params, @@ -813,7 +811,7 @@ void ClangdLSPServer::onRename(const RenameParams &Params, return Reply(llvm::make_error( "onRename called for non-added file", ErrorCode::InvalidParams)); Server->rename( - File, Params.position, Params.newName, RenameOpts, + File, Params.position, Params.newName, Opts.Rename, [File, Params, Reply = std::move(Reply), this](llvm::Expected Edits) mutable { if (!Edits) @@ -1030,7 +1028,8 @@ void ClangdLSPServer::onCompletion(const CompletionParams &Params, vlog("ignored auto-triggered completion, preceding char did not match"); return Reply(CompletionList()); } - Server->codeComplete(Params.textDocument.uri.file(), Params.position, CCOpts, + Server->codeComplete(Params.textDocument.uri.file(), Params.position, + Opts.CodeComplete, [Reply = std::move(Reply), this](llvm::Expected List) mutable { if (!List) @@ -1038,7 +1037,7 @@ void ClangdLSPServer::onCompletion(const CompletionParams &Params, CompletionList LSPList; LSPList.isIncomplete = List->HasMore; for (const auto &R : List->Completions) { - CompletionItem C = R.render(CCOpts); + CompletionItem C = R.render(Opts.CodeComplete); C.kind = adjustKindToCapability( C.kind, SupportedCompletionItemKinds); LSPList.items.push_back(std::move(C)); @@ -1224,7 +1223,7 @@ void ClangdLSPServer::onChangeConfiguration( void ClangdLSPServer::onReference(const ReferenceParams &Params, Callback> Reply) { Server->findReferences(Params.textDocument.uri.file(), Params.position, - CCOpts.Limit, + Opts.CodeComplete.Limit, [Reply = std::move(Reply)]( llvm::Expected Refs) mutable { if (!Refs) @@ -1340,20 +1339,13 @@ void ClangdLSPServer::onSemanticTokensDelta( }); } -ClangdLSPServer::ClangdLSPServer( - class Transport &Transp, const ThreadsafeFS &TFS, - const clangd::CodeCompleteOptions &CCOpts, - const clangd::RenameOptions &RenameOpts, - llvm::Optional CompileCommandsDir, bool UseDirBasedCDB, - llvm::Optional ForcedOffsetEncoding, - const ClangdServer::Options &Opts) +ClangdLSPServer::ClangdLSPServer(class Transport &Transp, + const ThreadsafeFS &TFS, + const ClangdLSPServer::Options &Opts) : BackgroundContext(Context::current().clone()), Transp(Transp), - MsgHandler(new MessageHandler(*this)), TFS(TFS), CCOpts(CCOpts), - RenameOpts(RenameOpts), SupportedSymbolKinds(defaultSymbolKinds()), - SupportedCompletionItemKinds(defaultCompletionItemKinds()), - UseDirBasedCDB(UseDirBasedCDB), - CompileCommandsDir(std::move(CompileCommandsDir)), ClangdServerOpts(Opts), - NegotiatedOffsetEncoding(ForcedOffsetEncoding) { + MsgHandler(new MessageHandler(*this)), TFS(TFS), + SupportedSymbolKinds(defaultSymbolKinds()), + SupportedCompletionItemKinds(defaultCompletionItemKinds()), Opts(Opts) { // clang-format off MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize); MsgHandler->bind("initialized", &ClangdLSPServer::onInitialized); diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index ab34ea7be748f..3dc679c595105 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -36,17 +36,24 @@ class SymbolIndex; /// The server also supports $/cancelRequest (MessageHandler provides this). class ClangdLSPServer : private ClangdServer::Callbacks { public: - /// If \p CompileCommandsDir has a value, compile_commands.json will be - /// loaded only from \p CompileCommandsDir. Otherwise, clangd will look - /// for compile_commands.json in all parent directories of each file. - /// If UseDirBasedCDB is false, compile commands are not read from disk. - // FIXME: Clean up signature around CDBs. + struct Options : ClangdServer::Options { + /// Look for compilation databases, rather than using compile commands + /// set via LSP (extensions) only. + bool UseDirBasedCDB = true; + /// A fixed directory to search for a compilation database in. + /// If not set, we search upward from the source file. + llvm::Optional CompileCommandsDir; + /// The offset-encoding to use, or None to negotiate it over LSP. + llvm::Optional OffsetEncoding; + + /// Per-feature options. Generally ClangdServer lets these vary + /// per-request, but LSP allows limited/no customizations. + clangd::CodeCompleteOptions CodeComplete; + clangd::RenameOptions Rename; + }; + ClangdLSPServer(Transport &Transp, const ThreadsafeFS &TFS, - const clangd::CodeCompleteOptions &CCOpts, - const clangd::RenameOptions &RenameOpts, - llvm::Optional CompileCommandsDir, bool UseDirBasedCDB, - llvm::Optional ForcedOffsetEncoding, - const ClangdServer::Options &Opts); + const ClangdLSPServer::Options &Opts); /// The destructor blocks on any outstanding background tasks. ~ClangdLSPServer(); @@ -227,10 +234,6 @@ class ClangdLSPServer : private ClangdServer::Callbacks { } const ThreadsafeFS &TFS; - /// Options used for code completion - clangd::CodeCompleteOptions CCOpts; - /// Options used for rename. - clangd::RenameOptions RenameOpts; /// Options used for diagnostics. ClangdDiagnosticOptions DiagOpts; /// The supported kinds of the client. @@ -268,14 +271,11 @@ class ClangdLSPServer : private ClangdServer::Callbacks { // Store of the current versions of the open documents. DraftStore DraftMgr; + Options Opts; // The CDB is created by the "initialize" LSP method. - bool UseDirBasedCDB; // FIXME: make this a capability. - llvm::Optional CompileCommandsDir; // FIXME: merge with capability? std::unique_ptr BaseCDB; // CDB is BaseCDB plus any commands overridden via LSP extensions. llvm::Optional CDB; - ClangdServer::Options ClangdServerOpts; - llvm::Optional NegotiatedOffsetEncoding; // The ClangdServer is created by the "initialize" LSP method. llvm::Optional Server; }; diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 8e5d6cb97a327..60a6c267591cc 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -670,9 +670,11 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var if (auto EnvFlags = llvm::sys::Process::GetEnv(FlagsEnvVar)) log("{0}: {1}", FlagsEnvVar, *EnvFlags); + ClangdLSPServer::Options Opts; + Opts.UseDirBasedCDB = (CompileArgsFrom == FilesystemCompileArgs); + // If --compile-commands-dir arg was invoked, check value and override default // path. - llvm::Optional CompileCommandsDirPath; if (!CompileCommandsDir.empty()) { if (llvm::sys::fs::exists(CompileCommandsDir)) { // We support passing both relative and absolute paths to the @@ -686,7 +688,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var "will be ignored.", EC.message()); } else { - CompileCommandsDirPath = std::string(Path.str()); + Opts.CompileCommandsDir = std::string(Path.str()); } } else { elog("Path specified by --compile-commands-dir does not exist. The " @@ -694,7 +696,6 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var } } - ClangdServer::Options Opts; switch (PCHStorage) { case PCHStorageFlag::Memory: Opts.StorePreamblesInMemory = true; @@ -744,23 +745,22 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var Opts.PreserveRecoveryASTType = RecoveryASTType; Opts.FoldingRanges = FoldingRanges; - clangd::CodeCompleteOptions CCOpts; - CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; - CCOpts.Limit = LimitResults; + Opts.CodeComplete.IncludeIneligibleResults = IncludeIneligibleResults; + Opts.CodeComplete.Limit = LimitResults; if (CompletionStyle.getNumOccurrences()) - CCOpts.BundleOverloads = CompletionStyle != Detailed; - CCOpts.ShowOrigins = ShowOrigins; - CCOpts.InsertIncludes = HeaderInsertion; + Opts.CodeComplete.BundleOverloads = CompletionStyle != Detailed; + Opts.CodeComplete.ShowOrigins = ShowOrigins; + Opts.CodeComplete.InsertIncludes = HeaderInsertion; if (!HeaderInsertionDecorators) { - CCOpts.IncludeIndicator.Insert.clear(); - CCOpts.IncludeIndicator.NoInsert.clear(); + Opts.CodeComplete.IncludeIndicator.Insert.clear(); + Opts.CodeComplete.IncludeIndicator.NoInsert.clear(); } - CCOpts.SpeculativeIndexRequest = Opts.StaticIndex; - CCOpts.EnableFunctionArgSnippets = EnableFunctionArgSnippets; - CCOpts.AllScopes = AllScopesCompletion; - CCOpts.RunParser = CodeCompletionParse; - CCOpts.RankingModel = RankingModel; - CCOpts.DecisionForestBase = DecisionForestBase; + Opts.CodeComplete.SpeculativeIndexRequest = Opts.StaticIndex; + Opts.CodeComplete.EnableFunctionArgSnippets = EnableFunctionArgSnippets; + Opts.CodeComplete.AllScopes = AllScopesCompletion; + Opts.CodeComplete.RunParser = CodeCompletionParse; + Opts.CodeComplete.RankingModel = RankingModel; + Opts.CodeComplete.DecisionForestBase = DecisionForestBase; RealThreadsafeFS TFS; std::vector> ProviderStack; @@ -819,13 +819,11 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var return llvm::is_contained(TweakList, T.id()); return true; }; - llvm::Optional OffsetEncodingFromFlag; if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding) - OffsetEncodingFromFlag = ForceOffsetEncoding; + Opts.OffsetEncoding = ForceOffsetEncoding; - clangd::RenameOptions RenameOpts; // Shall we allow to customize the file limit? - RenameOpts.AllowCrossFile = CrossFileRename; + Opts.Rename.AllowCrossFile = CrossFileRename; // Initialize and run ClangdLSPServer. // Change stdin to binary to not lose \r\n on windows. @@ -856,10 +854,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var std::move(*Mappings)); } - ClangdLSPServer LSPServer( - *TransportLayer, TFS, CCOpts, RenameOpts, CompileCommandsDirPath, - /*UseDirBasedCDB=*/CompileArgsFrom == FilesystemCompileArgs, - OffsetEncodingFromFlag, Opts); + ClangdLSPServer LSPServer(*TransportLayer, TFS, Opts); llvm::set_thread_name("clangd.main"); int ExitCode = LSPServer.run() ? 0 diff --git a/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp b/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp index eaa6035df5ea6..9bf45881dc478 100644 --- a/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp @@ -8,11 +8,9 @@ #include "Annotations.h" #include "ClangdLSPServer.h" -#include "CodeComplete.h" #include "LSPClient.h" #include "Protocol.h" #include "TestFS.h" -#include "refactor/Rename.h" #include "support/Logger.h" #include "support/TestTracer.h" #include "llvm/ADT/StringRef.h" @@ -36,13 +34,14 @@ MATCHER_P(DiagMessage, M, "") { class LSPTest : public ::testing::Test, private clangd::Logger { protected: - LSPTest() : LogSession(*this) {} + LSPTest() : LogSession(*this) { + ClangdServer::Options &Base = Opts; + Base = ClangdServer::optsForTest(); + } LSPClient &start() { EXPECT_FALSE(Server.hasValue()) << "Already initialized"; - Server.emplace(Client.transport(), FS, CCOpts, RenameOpts, - /*CompileCommandsDir=*/llvm::None, /*UseDirBasedCDB=*/false, - /*ForcedOffsetEncoding=*/llvm::None, Opts); + Server.emplace(Client.transport(), FS, Opts); ServerThread.emplace([&] { EXPECT_TRUE(Server->run()); }); Client.call("initialize", llvm::json::Object{}); return Client; @@ -64,9 +63,7 @@ class LSPTest : public ::testing::Test, private clangd::Logger { } MockFS FS; - CodeCompleteOptions CCOpts; - RenameOptions RenameOpts; - ClangdServer::Options Opts = ClangdServer::optsForTest(); + ClangdLSPServer::Options Opts; private: // Color logs so we can distinguish them from test output. From 8392685c2b9f3c2025100dd25b6c6e5eae312d92 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 29 Sep 2020 16:28:50 +0200 Subject: [PATCH 124/544] [clangd] Mark code action as "preferred" if it's the sole quickfix action Differential Revision: https://reviews.llvm.org/D88489 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 14 ++++++++++++++ clang-tools-extra/clangd/Diagnostics.cpp | 2 ++ clang-tools-extra/clangd/Protocol.cpp | 2 ++ clang-tools-extra/clangd/Protocol.h | 7 +++++++ .../clangd/test/fixits-embed-in-diagnostic.test | 1 + 5 files changed, 26 insertions(+) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index a85736b948300..4d9c0a43d68df 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -1007,6 +1007,20 @@ void ClangdLSPServer::onCodeAction(const CodeActionParams &Params, for (const auto &T : *Tweaks) Actions.push_back(toCodeAction(T, File, Selection)); + // If there's exactly one quick-fix, call it "preferred". + // We never consider refactorings etc as preferred. + CodeAction *OnlyFix = nullptr; + for (auto &Action : Actions) { + if (Action.kind && *Action.kind == CodeAction::QUICKFIX_KIND) { + if (OnlyFix) { + OnlyFix->isPreferred = false; + break; + } + Action.isPreferred = true; + OnlyFix = &Action; + } + } + if (SupportsCodeAction) return Reply(llvm::json::Array(Actions)); std::vector Commands; diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index afa72f9d40513..ca3d6c5f8ca98 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -411,6 +411,8 @@ void toLSPDiags( Main.codeActions.emplace(); for (const auto &Fix : D.Fixes) Main.codeActions->push_back(toCodeAction(Fix, File)); + if (Main.codeActions->size() == 1) + Main.codeActions->front().isPreferred = true; } if (Opts.SendDiagnosticCategory && !D.Category.empty()) Main.category = D.Category; diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 00ac071f496a0..61a691f2048f0 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -740,6 +740,8 @@ llvm::json::Value toJSON(const CodeAction &CA) { CodeAction["kind"] = *CA.kind; if (CA.diagnostics) CodeAction["diagnostics"] = llvm::json::Array(*CA.diagnostics); + if (CA.isPreferred) + CodeAction["isPreferred"] = true; if (CA.edit) CodeAction["edit"] = *CA.edit; if (CA.command) diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 3f717e8467fc9..4ef94e6e01db9 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -952,6 +952,13 @@ struct CodeAction { /// The diagnostics that this code action resolves. llvm::Optional> diagnostics; + /// Marks this as a preferred action. Preferred actions are used by the + /// `auto fix` command and can be targeted by keybindings. + /// A quick fix should be marked preferred if it properly addresses the + /// underlying error. A refactoring should be marked preferred if it is the + /// most reasonable choice of actions to take. + bool isPreferred = false; + /// The workspace edit this code action performs. llvm::Optional edit; diff --git a/clang-tools-extra/clangd/test/fixits-embed-in-diagnostic.test b/clang-tools-extra/clangd/test/fixits-embed-in-diagnostic.test index cfa47210d7d8f..debe4dfa5e789 100644 --- a/clang-tools-extra/clangd/test/fixits-embed-in-diagnostic.test +++ b/clang-tools-extra/clangd/test/fixits-embed-in-diagnostic.test @@ -28,6 +28,7 @@ # CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: }, +# CHECK-NEXT: "isPreferred": true, # CHECK-NEXT: "kind": "quickfix", # CHECK-NEXT: "title": "change 'union' to 'struct'" # CHECK-NEXT: } From 779a8a028f53f16234b41e5252b805304788b989 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 09:36:57 +0100 Subject: [PATCH 125/544] [ARM][LowOverheadLoops] TryRemove helper. Make a helper function that wraps around RDA::isSafeToRemove and utilises the existing DCE IT block checks. --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 3 + llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 126 +++++++++++--------- 2 files changed, 71 insertions(+), 58 deletions(-) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 63989bd2317f3..e94e547800a49 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -635,6 +635,9 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI, InstSet &Dead) const { Dead.insert(MI); auto IsDead = [this, &Dead](MachineInstr *Def, int PhysReg) { + if (mayHaveSideEffects(*Def)) + return false; + unsigned LiveDefs = 0; for (auto &MO : Def->operands()) { if (!isValidRegDef(MO)) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 72e772e7bb516..f5fbe26f9f782 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -505,8 +505,6 @@ namespace { void Expand(LowOverheadLoop &LoLoop); - void DCE(MachineInstr *MI, SmallPtrSetImpl &ToRemove); - void IterationCountDCE(LowOverheadLoop &LoLoop); }; } @@ -521,6 +519,72 @@ std::map BasicBlocks; + for (auto *Dead : Killed) + BasicBlocks.insert(Dead->getParent()); + + // Collect IT blocks in all affected basic blocks. + std::map> ITBlocks; + for (auto *MBB : BasicBlocks) { + for (auto &IT : *MBB) { + if (IT.getOpcode() != ARM::t2IT) + continue; + RDA.getReachingLocalUses(&IT, ARM::ITSTATE, ITBlocks[&IT]); + } + } + + // If we're removing all of the instructions within an IT block, then + // also remove the IT instruction. + SmallPtrSet ModifiedITs; + SmallPtrSet RemoveITs; + for (auto *Dead : Killed) { + if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) { + MachineInstr *IT = RDA.getMIOperand(Dead, *MO); + RemoveITs.insert(IT); + auto &CurrentBlock = ITBlocks[IT]; + CurrentBlock.erase(Dead); + if (CurrentBlock.empty()) + ModifiedITs.erase(IT); + else + ModifiedITs.insert(IT); + } + } + if (!ModifiedITs.empty()) + return false; + Killed.insert(RemoveITs.begin(), RemoveITs.end()); + return true; + }; + + SmallPtrSet Uses; + if (!RDA.isSafeToRemove(MI, Uses, Ignore)) + return false; + + if (WontCorruptITs(Uses, RDA)) { + ToRemove.insert(Uses.begin(), Uses.end()); + LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI + << " - can also remove:\n"; + for (auto *Use : Uses) + dbgs() << " - " << *Use); + + SmallPtrSet Killed; + RDA.collectKilledOperands(MI, Killed); + if (WontCorruptITs(Killed, RDA)) { + ToRemove.insert(Killed.begin(), Killed.end()); + LLVM_DEBUG(for (auto *Dead : Killed) + dbgs() << " - " << *Dead); + } + return true; + } + return false; +} + bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { if (!StartInsertPt) return false; @@ -669,7 +733,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { Ignore.insert(VCTPs.begin(), VCTPs.end()); - if (RDA.isSafeToRemove(Def, ElementChain, Ignore)) { + if (TryRemove(Def, RDA, ElementChain, Ignore)) { bool FoundSub = false; for (auto *MI : ElementChain) { @@ -683,10 +747,6 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { } else return false; } - - LLVM_DEBUG(dbgs() << "ARM Loops: Will remove element count chain:\n"; - for (auto *MI : ElementChain) - dbgs() << " - " << *MI); ToRemove.insert(ElementChain.begin(), ElementChain.end()); } } @@ -1300,52 +1360,6 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { MI->eraseFromParent(); } -void ARMLowOverheadLoops::DCE(MachineInstr *MI, - SmallPtrSetImpl &ToRemove) { - // Collect the dead code and the MBBs in which they reside. - SmallPtrSet Killed; - RDA->collectKilledOperands(MI, Killed); - SmallPtrSet BasicBlocks; - for (auto *Dead : Killed) - BasicBlocks.insert(Dead->getParent()); - - // Collect IT blocks in all affected basic blocks. - std::map> ITBlocks; - for (auto *MBB : BasicBlocks) { - for (auto &IT : *MBB) { - if (IT.getOpcode() != ARM::t2IT) - continue; - RDA->getReachingLocalUses(&IT, ARM::ITSTATE, ITBlocks[&IT]); - } - } - - // If we're removing all of the instructions within an IT block, then - // also remove the IT instruction. - SmallPtrSet ModifiedITs; - for (auto *Dead : Killed) { - if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) { - MachineInstr *IT = RDA->getMIOperand(Dead, *MO); - auto &CurrentBlock = ITBlocks[IT]; - CurrentBlock.erase(Dead); - if (CurrentBlock.empty()) - ModifiedITs.erase(IT); - else - ModifiedITs.insert(IT); - } - } - - // Delete the killed instructions only if we don't have any IT blocks that - // need to be modified because we need to fixup the mask. - // TODO: Handle cases where IT blocks are modified. - if (ModifiedITs.empty()) { - LLVM_DEBUG(dbgs() << "ARM Loops: Will remove iteration count:\n"; - for (auto *MI : Killed) - dbgs() << " - " << *MI); - ToRemove.insert(Killed.begin(), Killed.end()); - } else - LLVM_DEBUG(dbgs() << "ARM Loops: Would need to modify IT block(s).\n"); -} - // Perform dead code elimation on the loop iteration count setup expression. // If we are tail-predicating, the number of elements to be processed is the // operand of the VCTP instruction in the vector body, see getCount(), which is @@ -1385,11 +1399,7 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) { // Collect and remove the users of iteration count. SmallPtrSet Killed = { LoLoop.Start, LoLoop.Dec, LoLoop.End, LoLoop.InsertPt }; - SmallPtrSet Remove; - if (RDA->isSafeToRemove(Def, Remove, Killed)) { - LoLoop.ToRemove.insert(Remove.begin(), Remove.end()); - DCE(Def, LoLoop.ToRemove); - } else + if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"); } From cdda7822d6ce9cd6fe305e6fffedf3480d4bb769 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 30 Sep 2020 08:38:08 +0000 Subject: [PATCH 126/544] [MLIR][Standard] Add `atan2` to standard dialect Differential Revision: https://reviews.llvm.org/D88168 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 40 +++++++++++++++++++ mlir/test/Dialect/Standard/ops.mlir | 13 ++++++ 2 files changed, 53 insertions(+) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 43d47941d0ab4..352b7d8fd3d69 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -525,6 +525,46 @@ def AtanOp : FloatUnaryOp<"atan", []>{ }]; } +//===----------------------------------------------------------------------===// +// Atan2Op +//===----------------------------------------------------------------------===// + +def Atan2Op : FloatArithmeticOp<"atan2">{ + let summary = "2-argument arcus tangent of the given values"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `std.atan2` ssa-use `,` ssa-use `:` type + ``` + + The `atan2` operation takes two operands and returns one result, all of + which must be of the same type. This type may be a floating point scalar + type, a vector whose element type is a floating point type, or a floating + point tensor. + + The 2-argument arcus tangent `atan2(y, x)` returns the angle in the + Euclidian plane between the positive x-axis and the ray through the point + (x, y). It is a generalization of the 1-argument arcus tangent which + returns the angle on the basis of the ratio y/x. + + See also https://en.wikipedia.org/wiki/Atan2 + + Example: + + ```mlir + // Scalar variant. + %a = atan2 %b, %c : f32 + + // SIMD vector variant. + %f = atan2 %g, %h : vector<4xf32> + + // Tensor variant. + %x = atan2 %y, %z : tensor<4x?xf32> + ``` + }]; +} + //===----------------------------------------------------------------------===// // AtomicRMWOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/Standard/ops.mlir index a765acb9657b5..64474e391b81e 100644 --- a/mlir/test/Dialect/Standard/ops.mlir +++ b/mlir/test/Dialect/Standard/ops.mlir @@ -19,11 +19,13 @@ func @test_index_cast_tensor_reverse(%arg0 : tensor) -> tensor { return %0 : tensor } +// CHECK-LABEL: @assert func @assert(%arg : i1) { assert %arg, "Some message in case this assertion fails." return } +// CHECK-LABEL: @dynamic_tensor_from_elements func @dynamic_tensor_from_elements(%m : index, %n : index) -> tensor { %tnsr = dynamic_tensor_from_elements %m, %n { @@ -34,3 +36,14 @@ func @dynamic_tensor_from_elements(%m : index, %n : index) return %tnsr : tensor } +// CHECK-LABEL: @atan +func @atan(%arg : f32) -> f32 { + %result = atan %arg : f32 + return %result : f32 +} + +// CHECK-LABEL: @atan2 +func @atan2(%arg0 : f32, %arg1 : f32) -> f32 { + %result = atan2 %arg0, %arg1 : f32 + return %result : f32 +} From fdceec7aeac6ae0fba4db9703bf4e4e69a126d0d Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 29 Sep 2020 13:53:38 +0300 Subject: [PATCH 127/544] [llvm-readobj][ARM] - Improve support of printing unwind (-u) information for non-relocatable objects. This is the one more patch for https://bugs.llvm.org/show_bug.cgi?id=47581 It fixes how we print an information for the Generic model. With this patch we are able to read values from `.ARM.extab` and dump proper personality routines names/addresses. Differential revision: https://reviews.llvm.org/D88478 --- .../ELF/ARM/unwind-non-relocatable.test | 52 ++++++++++++++++-- llvm/tools/llvm-readobj/ARMEHABIPrinter.h | 53 +++++++++++++------ 2 files changed, 85 insertions(+), 20 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/ARM/unwind-non-relocatable.test b/llvm/test/tools/llvm-readobj/ELF/ARM/unwind-non-relocatable.test index 8740fa1b342d9..d4351d62a686a 100644 --- a/llvm/test/tools/llvm-readobj/ELF/ARM/unwind-non-relocatable.test +++ b/llvm/test/tools/llvm-readobj/ELF/ARM/unwind-non-relocatable.test @@ -59,6 +59,23 @@ # UNWIND-NEXT: 0xB0 ; finish # UNWIND-NEXT: ] # UNWIND-NEXT: } +# UNWIND-NEXT: Entry { +# UNWIND-NEXT: FunctionAddress: 0x25C +# UNWIND-NEXT: FunctionName: func5 +# UNWIND-NEXT: ExceptionHandlingTable: .ARM.extab +# UNWIND-NEXT: TableEntryAddress: 0xAABE44 +# UNWIND-NEXT: Model: Generic +# UNWIND-NEXT: PersonalityRoutineAddress: 0x33CCCF44 +# UNWIND-NEXT: PersonalityRoutineName: personality1 +# UNWIND-NEXT: } +# UNWIND-NEXT: Entry { +# UNWIND-NEXT: FunctionAddress: 0x25C +# UNWIND-NEXT: FunctionName: func5 +# UNWIND-NEXT: ExceptionHandlingTable: .ARM.extab +# UNWIND-NEXT: TableEntryAddress: 0xAABE48 +# UNWIND-NEXT: Model: Generic +# UNWIND-NEXT: PersonalityRoutineAddress: 0xFFFFFFFFF811138C +# UNWIND-NEXT: } # UNWIND-NEXT: ] # UNWIND-NEXT: } # UNWIND-NEXT: } @@ -77,21 +94,39 @@ Sections: Type: SHT_ARM_EXIDX Address: 0x24C Entries: -## Address of .ARM.exidx (0x24C) + entry offset (0) + 0x7fffffe4 (31 bit) == 0x230 (func1). +## A. Address of .ARM.exidx (0x24C) + entry offset (0) + 0x7fffffe4 (31 bit) == 0x230 (func1). - Offset: 0x7FFFFFE4 Value: 0x80B0B0B0 ## arbitrary opcodes. -## Address of .ARM.exidx (0x24C) + entry offset (8) + 0x7fffffe0 (31 bit) == 0x234 (func2). +## B. Address of .ARM.exidx (0x24C) + entry offset (8) + 0x7fffffe0 (31 bit) == 0x234 (func2). - Offset: 0x7FFFFFE0 Value: 0x809B8480 ## arbitrary opcodes. -## Address of .ARM.exidx (0x24C) + entry offset (16) + 0x7fffffec (31 bit) == 0x248 (func2). +## C. Address of .ARM.exidx (0x24C) + entry offset (16) + 0x7fffffec (31 bit) == 0x248 (func3). - Offset: 0x7FFFFFEC Value: 0x80B0B0B0 ## arbitrary opcodes. -## Address of .ARM.exidx (0x24C) + entry offset (24) + 0x7fffffe8 (31 bit) == 0x24C. +## D. Address of .ARM.exidx (0x24C) + entry offset (24) + 0x7fffffe8 (31 bit) == 0x24C. - Offset: 0x7FFFFFE8 Value: EXIDX_CANTUNWIND -## Address of .ARM.exidx (0x24C) + entry offset (32) + 0x3FFFFFFF (31 bit) == 0x4000026b. +## E. Address of .ARM.exidx (0x24C) + entry offset (32) + 0x3FFFFFFF (31 bit) == 0x4000026b (func4). - Offset: 0x3FFFFFFF Value: 0x80B0B0B0 ## arbitrary opcodes. +## F. Address of .ARM.exidx (0x24C) + entry offset (40) + 0x7FFFFFE8 (31 bit) == 0x25c (func5). + - Offset: 0x7FFFFFE8 +## Generic model. .ARM.exidx (0x24C) + entry offset (40 + 4) + 0x00AABBCC == +## 0x00AABE44 == address of entry [0] in the .ARM.extab section. +## 0x00AABE44 + 0x33221100 (31 bit, signed, .ARM.extab entry [0] value) == +## 0x33cccf44 == personality1 routine address. + Value: 0x00AABBCC +## G. Address of .ARM.exidx (0x24C) + entry offset (48) + 0x7FFFFFE0 (31 bit) == 0x25c (func5). + - Offset: 0x7FFFFFE0 +## Generic model. .ARM.exidx (0x24C) + entry offset (48 + 4) + 0x00AABBC8 == +## 0x00AABE48 == address of entry [1] in the .ARM.extab section. +## 0x00AABE48 + 0x77665544 (31 bit, signed, .ARM.extab entry [1] value) == +## 0xFFFFFFFFF811138C == the address of a personality routine function that does not exist. + Value: 0x00AABBC8 + - Name: .ARM.extab + Type: SHT_PROGBITS + Address: 0x00AABE44 + Content: "0011223344556677" Symbols: - Name: func1 Type: STT_FUNC @@ -109,3 +144,10 @@ Symbols: Type: STT_FUNC Section: .text Value: 0x4000026b + - Name: func5 + Type: STT_FUNC + Section: .text + Value: 0x25c + - Name: personality1 + Type: STT_FUNC + Value: 0x33cccf44 diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h index c91d57ca2626a..3d8acbf48fa93 100644 --- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h +++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h @@ -347,7 +347,7 @@ class PrinterContext { off_t IndexTableOffset) const; void PrintIndexTable(unsigned SectionIndex, const Elf_Shdr *IT) const; - void PrintExceptionTable(const Elf_Shdr *IT, const Elf_Shdr *EHT, + void PrintExceptionTable(const Elf_Shdr &EHT, uint64_t TableEntryOffset) const; void PrintOpcodes(const uint8_t *Entry, size_t Length, off_t Offset) const; @@ -434,11 +434,20 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, } template -void PrinterContext::PrintExceptionTable(const Elf_Shdr *IT, - const Elf_Shdr *EHT, +static const typename ET::Shdr * +findSectionContainingAddress(const object::ELFFile &Obj, StringRef FileName, + uint64_t Address) { + for (const typename ET::Shdr &Sec : unwrapOrError(FileName, Obj.sections())) + if (Address >= Sec.sh_addr && Address < Sec.sh_addr + Sec.sh_size) + return &Sec; + return nullptr; +} + +template +void PrinterContext::PrintExceptionTable(const Elf_Shdr &EHT, uint64_t TableEntryOffset) const { // TODO: handle failure. - Expected> Contents = ELF.getSectionContents(*EHT); + Expected> Contents = ELF.getSectionContents(EHT); if (!Contents) return; @@ -487,11 +496,14 @@ void PrinterContext::PrintExceptionTable(const Elf_Shdr *IT, } } else { SW.printString("Model", StringRef("Generic")); - - uint64_t Address = PREL31(Word, EHT->sh_addr); + const bool IsRelocatable = ELF.getHeader().e_type == ELF::ET_REL; + uint64_t Address = IsRelocatable + ? PREL31(Word, EHT.sh_addr) + : PREL31(Word, EHT.sh_addr + TableEntryOffset); SW.printHex("PersonalityRoutineAddress", Address); - if (ErrorOr Name = - FunctionAtAddress(Address, (unsigned)EHT->sh_link)) + Optional SecIndex = + IsRelocatable ? Optional(EHT.sh_link) : None; + if (ErrorOr Name = FunctionAtAddress(Address, SecIndex)) SW.printString("PersonalityRoutineName", *Name); } } @@ -580,19 +592,30 @@ void PrinterContext::PrintIndexTable(unsigned SectionIndex, PrintOpcodes(Contents->data() + Entry * IndexTableEntrySize + 4, 3, 1); } else { - const Elf_Shdr *EHT = - FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4); + const Elf_Shdr *EHT; + uint64_t TableEntryAddress; + if (IsRelocatable) { + TableEntryAddress = PREL31(Word1, IT->sh_addr); + EHT = FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4); + } else { + TableEntryAddress = + PREL31(Word1, IT->sh_addr + Entry * IndexTableEntrySize + 4); + EHT = findSectionContainingAddress(ELF, FileName, TableEntryAddress); + } if (EHT) // TODO: handle failure. if (Expected Name = ELF.getSectionName(*EHT)) SW.printString("ExceptionHandlingTable", *Name); - uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr); - SW.printHex("TableEntryOffset", TableEntryOffset); - - if (EHT) - PrintExceptionTable(IT, EHT, TableEntryOffset); + SW.printHex(IsRelocatable ? "TableEntryOffset" : "TableEntryAddress", + TableEntryAddress); + if (EHT) { + if (IsRelocatable) + PrintExceptionTable(*EHT, TableEntryAddress); + else + PrintExceptionTable(*EHT, TableEntryAddress - EHT->sh_addr); + } } } } From 0767a0b53e37009a70788c2a44834ed14a951cba Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 29 Sep 2020 17:06:08 +0300 Subject: [PATCH 128/544] [llvm-readobj][test] - Stop using few precompiled binaries in mips-got.test This removes 4 input files (one source file and 3 precompiled binaries) from `mips-got.test` (now YAMLs are used instead) and also makes the testing of the GNU output a bit stricter (`--strict-whitespace --match-full-lines`). Differential revision: https://reviews.llvm.org/D88488 --- .../ELF/Inputs/dynamic-table-exe.mips | Bin 6333 -> 0 bytes .../ELF/Inputs/dynamic-table-so.mips | Bin 5395 -> 0 bytes .../llvm-readobj/ELF/Inputs/dynamic-table.c | 12 - .../ELF/Inputs/got-tls.so.elf-mips64el | Bin 7398 -> 0 bytes .../test/tools/llvm-readobj/ELF/mips-got.test | 655 +++++++----------- 5 files changed, 249 insertions(+), 418 deletions(-) delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table-exe.mips delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table-so.mips delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table.c delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/got-tls.so.elf-mips64el diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table-exe.mips b/llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table-exe.mips deleted file mode 100644 index 28d8e33752c59b4067a16a43a1ef36f0f707833f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6333 zcmb7IZETxY6+X7p)MZ`kq-9G>M(xH+3TvE?byd4rVCSQ0M%reX2bxsrjlYt3_)E4k zH3Df?7oZYl>AImYRJ5=Gs>C3Ht*{^aur>+y1EdW$(SH;%L?wo_Nq{n_W}fHX_u6l> z7O^8A-}~Hi?z!il`|;j;9~zxF_+^vqkuIdI zD5q?nP$wdOR3>jgSqs<$jseG95ydwGlPN^-7q>LTC5C5Wkq`#ZN|8mJH%bZj_6;<`NqtF9py{~-pwYlS; zYx&Pp&kQZxdEv2}9{BDr{!E*+O*a1VnIJ6J=+)REHudq^(=e_W!UY@ab?eD&Nw21L6gwoN_ybuHS7LZiD)-mf2b zD4}0627bqmWt(i3JJ!i>17_?ENeDb$Xn$~yi~C)!8A#7 zlI$02lDm(wY+GWkqJ ziu0ASj7^LW4O@euem2H+pqYMf)o>ihBwr?jsp6KnhH@PJcnATTW{QBir87LYc&;aK z=}FI}`b#}Nd2yvvUReoNPcF4cM41UzpIq9s=4Xguy^X^lCtW^H#Rh)M#;CjGl}jGK zywpRvp&_(WJ%x6#Krw5>vT-pe@&zKjiqHt8);YIIa@EhKtV3%(M^I>SONA+Pxm_BLcgFRR3 zeoSM|E8Qn&-9@|Z!#=mXy2~vAUqXiWxWyyOnDYPI&Kro|9(ishD%--V-o)9xjHCQ~ z#Vvy zs~zy6V;Cd0aGGNP-JepQI&1am?bJEY7z=k0?LLimm*B?|+FnB2OK4Y(Nk7JfeVz24 zJbi;57mTNB7@6<4eFD?a6iF9JFbym+Vb)6Yb~w4^yx}{;AY4gW3%TPZ7{xXR6n{d&HB6l8taX_z~OW@ zTiEOWQlXShUF~mYIOctFDds)b(^c#A1WsJ?oUC;S{ra%pfqAhteBArNx40&bd)ICy zp0@EcF!u2}?U~eRTf7l0Ch*6nD)&)-+ntom#`ClO2h*i;rjYmF9=gLXGT9n|%IB+B zY(J=f0DKF07kEv{ky%GZF4*5eHD`T43O)^HeQsv#8}`qy?z-_7*$Yf!yXTR{wj7xL zw)=6Ftllyo|0a+!Q3T`Ec_U#O%*(V_9)N_l*+KHlLwNtU8v~#S{KvI%EjS4%F#Dnm?)!CvXINA^C&E)P%V{C%+DIL^e(BwA{o1=XO)#p!RfHG1Sn4>^FeHIk)BX6k1&I};M zIYXU;cwXh%wZkYOr5^E}U}I089rd{p#fpr*BS5gH+O5G}4fgQ)Q}ct0)Z<*?d4lH} z`uBo60@mklwn3sEndcUs=cxCtI|3G|B2OFaD|Pt6PJumO>hVmN1>jlG5OqmeFs6Kg zdOVw5gdXdgc3Dnd0P{S@JkNBOq31{5*)E`*0&fCSk7q(J95VKJR-}Kd^C*};Fg9HO z9S}IrWYi-)0XBM7=v5);XUA+(hZE$#2R3><6R)BJ`-~D&WA7PTkN)wz_YU;vmm#C~ zV-(OH_12)b20bIAuF0Q9-n6UtK#>R)Z1UUb1*FuY>)h)i2m<3^$k=-k1xD{M^bTJ~ z?=^?sB=jbsXKc`>$-nN<YY7&}5l%jvB&2L|xi;qu?$u`$0e0jMLuZxCDO2QDE#r?IXmyJ;`IY%KcMg-7@z{ zjkQ7UhZ^fB)*5(&D(C)2@~G9X_c4w2uEYIGV~)7qmo(PF;{KyCZ@8Ej*cV71wKMAd zMB~k>oco5xyfN!Lv&LH#=02b?KiGP`Yiy68eXjA?JB2x`cz7cQah;Mp_Dth?)R;G9 ze3u$n-@LiLNFLtEL0m^9kGe_gJGsVs#c*9{yhEj&{~GgF+a!CSESY>nyN|mkaS)1~&0> z;YHymz?bay+-UDr;LZ&Z{Bxe)C~qT8Iq=_r84njym+VyQ`gvf^JYp{d-qr|^uD{+t zT%Kgajwk&K0CT@5@fO_&?DX$0Vp~ps?g2K${vKb4jf^SfS<3DCpZIbBe(*nli*|nln149zVvmd} zeV^=*RxD|LKS}MfMK%LZ*zz{_HrWAe>bIeOH?SeDhW!q_8+a7h%-=Tm4w(XWzO%%E zjXfXi%>f&ty{`eA^}%(KW@|4+b+jd+Bhv-!d^eK5aH?#l1{@!BJ zu8xwv(TTofdA?7*LWK%4heJQl{Gp^IOOP)2RmoY&Y$1=6`!$LzD^;*&vxP)FYpHvK6`wyLxC|7t=}I~kx?}%fzffRh zQYVN8MO`+m)O;?tVEh?9I-=Y;G!;EEY8ga#OupHQx}>O9(%G3_yNmV>30J8^HqCxc zP8}N_9l396RK2}L$40GKbZBCfeP$IaUMj^GtaKg^ZT2JG)e;p(v1W@XRxfpkt)=cw zM!)%}Hy%4;HRBaKGR$bj=!x)xq#yajN~_zFZA3@D(YvM^U;HAG=9AyL$3?`uT(IWi z`4p_Le=?lWEv$c9yxQEl7fAMt5rKaUhK0+V?XZ58oGn_ZOqpS@Lo%c8kk;J~T2uB6 t8P3MbWlRstLf_4ZjZGXIicVO^4jsC8G}Z|I4DX5-17_yZRpHW}{1>~l&NTo4 diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table-so.mips b/llvm/test/tools/llvm-readobj/ELF/Inputs/dynamic-table-so.mips deleted file mode 100644 index ab36ceeb5a00197c3d4fdeade13337d93f21f180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5395 zcmb7IYiwLs5uV-aBo2gf?Kt3oq3d+9jYzK?H+@9-$HtGmY4ZbJ@5}b0eX)C8 zC4j`nsYE2KZb>RcL z_@mg3HnALLhT_y}-)Zh(Esz0~T=qk*DSSllf~e-TbBZ`|Ed}E!_U0xA9olyIk8#z%9T5 zEa*d^(^w1j+Hjm}yafeZ%SG@ApWFt15NHFbdlhT$0PTiwXVAYD6lxfoGHqa}%s4<# z8H9Su)P;Fv%3@xbJArnYKD^s#l zHdgpu&>9?{1ntDCegm3l&fg|iz_;{mlbzssMhIKwZ;jVLc}EB~=5s{#cVPSm`0)Hb z@ak_T{Cx(zLJWYY(@m#REV$KtHw(Mqd)v~xV;Ke;Sb=b{hE7{px)yr0<4v*$b z#q3lvmCu^7dyly3lSwK_=5t?$;GyT-V}XXze6m{2RwbWHrJZWYc|vANB`ME)Rao@0 zC*i?NCg~-`%^KpSs#W77M$pDUF(dmY4vvhvL(X8cHDnge7e)V>_r<(19OXIXIp(Xv zd&_f7uyZ*aYfFB|Hn7v@qw+@%h9HJBVRZBiUp(`2fA8uAYqfE;-SWdklATWTyrH4!}0&bMAi|Ferl0>0r@vLhjRaFOm|TyWtTVX>A%!4dxWzQ zhhMM4zdHP_!@oNGtHVD(W`l@1V|4L!D@J+-R@xU9c@8$h|6%BD6BAq7Uco-7J?>xI z#y$N%Xowx&!>B}uCr^*WsgHWP<=Rs3JFi&1_6ZrNomhOmJ>EYC+Pj(%JzHJ!_EPVi zWec+K@h$jzwO{v^eoE8Nh5D`FtS+~oSZp++@i=T~?Av9>R>ro`=!jowxBIC7Zo}?t zlkG3uZS3-&Ajq$Y#lX7rXJgI1Cs!@bwWT}Ga}@^qQ?9-D-h|Knq(5%%$0TA@4EDv} zf7sV^e;}LBmv+XUDOK_rwSnKe2HQ%te;uuvw>cub&o_hLfps!XR^M56P(Gbc7H4D6 zWh>QOsTh0A*&Q>#9T;A|8`$?{@#mAkhk<+fBRE@}mk56y+alsrPZhjm3e>9@I;SnZ zr^T6)IK`5ebuh_E&FAu&M{^l*GN+2kLM|=NT(Uanv(RIxR}o)0aBL0ZVyu$QbGkVG zBk%YRt#kO`@&H=&tKsmybK;|Tr3+IxWr%;YRsR^LwJIvvT-wygfgmT1vK;}Qx$J~xcy=sFp z{2@33G`|@p7i2zo8+{5UixBxdI&geS)VGYu#6Glxw8FQ;u{4nR9ox+Dh=XX;E_H$R zp@P56(J05p+#`-66&T z84}fZ9&MGIh757y9~IUb$YAwy&isz4pY?+27FU4aZyZK12#{&673Jz2)xUxpn1 zH$mrIMi2E;?gr#;+(+)`A-NUEt=z}%wU(UC_h%Jy3G^v2ezbiZ7}&M>-o&tp>Jw$P z{VTNTH~kxgTpV(`2k2_He~UKt4YCtJIsXUWu>KgwZvjaao`PIV8Bn#$oai4oe>(K+ zI=ialuMp5SW#=Fl1|4Iqz%?jP?mxv4KO?|2Y`2-VEMNT=Pd}L<-b_w?wWD# zq`9MpH^I^xfi(?*caC5Ml~=+{1JCM9nl+9U+z{Rsf+YwRfOmpm1yvc(zv0cv;W;-v zcLg<0lkfC-^SjCU<^=J45-ikM1fD~JCEI+ee=PZkHhpTXsc0zAm%Ig*Y;W#6d)rV$ zafRw{1Ut=oSN&xLJpf+)ji8ES&8P8-B3_5UxABn!kpBYcWQebVr~h0*1pY68?gq~- zBmW}k0DouzoNrcYc>4D;n!gC*MgM*TUgI4>yx#;L_U}6RkiI{FSD-#^lal{4sP1)`o!K;5fEnf}Ir@n81SNp8F&w;OD{gW7D&HXLT56+K`cz*|c%icD54ZQlx zn)?!X-Cx$+Z-Q5dq48_*YL7MepTLLp-Cidj!QIvS#+qEx6%VP=%#4#pp*%Wu^q4zw z@c0z#WOr;0cWAUZ)GCx+ui&OxDpy4-(`O zdq3@D_1~H}2$gF;OqdS>{7&H4{%-h_KIWvgQq@wUmP!#I#VtsUL`kdyRS*@j_(~-!Ys3fnL2{9*u2jfy&b{ZX zXYYe57kpfvRFMq~+nsY}Noa8+zIL(`ok(lG|GsS^X2SY99xL+Ny&{CfUGP9VvC zoF@8-UagIG-iWia`hpHrrU9I0xSe5& zA!~D|sBHA%9WHZ-z-;nA5CqTgmT$ zB}4vQrkj4>RP@cweU;6qb=VE>ho;|K>)81m(znIr;RU3BvN@A_{>aBk{I{uNv~MDA z8#F>ch5tKa=s!Ukt%0-hL9$FgLYwkJ$k5?oclhAI;9(bB zw>VyzaBF_9>br=RCQ3fIqX@t~HhjV@cvY`hs`*}Z_(Zl`nec{lBV|wX;yG@9I!6X_ z<t2DPzV)hobgrGoDLa?LGzzMCr)s!%SC;IV`{_bOW=IB|7jk5jY-9&y_LQ( z8~?YLO88k?@HgyJBkk7^$~U-8@sscm#clt&(z-r-ueIRYtV@jZ{?>cB72mxY{W|?$J>))ZRwx>?eF%m5a8xN%W-h;;MJ80+kXydrO!nJCdg>v-8Cs_*G~JKdkp+&K9?fTui|mVJEyE?t~0( z!3*JK&S4+I%gr1fTDF6W^?kt)>UJu zDuX%+9CxD!W9`Y}kutMP^MJB!4D(`)mcXR%izBgaAgY(G05jMJJm+>B$3edI%I>+R{(RE(=lR0dl-&aFO z(L7R~C?7Ul4|_0Q>D9|>ozU6}&^E3cT-8tUDsnJgO^zDCzduY4FyU7jA9j(>1+}XTlQ<$6d^om7#-Oy7Y`Dioq zOL?6P=8NVxayro8(NVNl;5n51M!yj{Yv~{F8LEjd6BoJxgXYbM_W{{2R{4G|@<-_) z$7b`nxXMsd@Bi04PyU-`N1#U{aXppIe%bCFs!Ub$UUkOKRtnyLH<2H+2TQ(Z_xJ7T zcl!3H_wV0rf6@7p(=X@vQUlG1uRiR27XKf`|ATZnDNam5f^It<32(UnH@Lx$kF~F8cDW z5%?kUqfN#ww4>O;rC1UDF}`FA>}5fTSL6gfjeL4z2rlmoff?4997+642HKG5Qx<)B zM+rR5g5qEFCI92dr!mnSQnOh=-~~-?{)!zT&>D>DOWmOfB`{VHzoI{5=*xa0&=x_4 z62Fk2$4=CL!1r5$gCfY#jGyk^q%UE<%lZOMc@zIJ>s!o_cZ}32%y>7{2G~2 zeK{AdvA)QO4HN$jGKePmlXu%9>x)0pH}OSekpDgrU?}<~on)yA{9~lA82q!!{_Q9c z`{w-r8EGGI4IwI;;tRbQulN;Ndh5&Bg+|b3eR}6co2gH@to^9mA{801_3*~<8SGOo zB>%F1(%uv2{}w4=d&n?u`qxtNzZPa$&Y`G;g%R6^UE*8#C+9}1d`GiB)k@z0vd_2D zH-hZnt@KUJI#4UU6J)<_rLR}-Xs;uXo&{ZP?{iIWuXkwrgCP49;nL#jDa5nzbtU_Y z$ZC2DWZ!6|cZ2K)t#qs;$afxLSXi%-?>9n5zdPFMIK;Q$6H&2d-@-~rT(eYlqm@JJ zse|e-Mx)O!5i?#|mI-`)o8#?O^!D*SiS#5^+2lM!RJG&yOZ0|(5KR9n)8&56WYFj2 zS{~#(m_>mE=I18oQ_Rh=9l4*29Xc=K#`_A=H)8!r_7{Qc<>wbDPx)Cf-UYN@C-6pS zuB&$xy*)pFK|19j?)?xK=N%>AzRo{T_ETWKQ<6AapV0H(&Orze^qwY*0b3E`#N?K$@seGCZxyJJt1H9 z@$ob|>g2JymmNNJ#vMF1G)%Qc_fQLVJlkZgRT*7>+|5&+u?A9JjF4QnP;rap%1Ey4 zs>hd`o0&1 | FileCheck %s -DOTHERNUM=0 -check-prefix=BASIC-LLVM +# RUN: llvm-readobj -A %t.got.so 2>&1 | FileCheck %s -DOTHERNUM=0 -check-prefix=BASIC-LLVM +# RUN: llvm-readobj -A %t.got.exe 2>&1 | FileCheck %s -DOTHERNUM=0 -check-prefix=BASIC-LLVM +# RUN: llvm-readelf -A %t.got.o 2>&1 | FileCheck %s -check-prefix=BASIC-GNU \ +# RUN: --implicit-check-not="Number of" --strict-whitespace --match-full-lines +# RUN: llvm-readelf -A %t.got.so 2>&1 | FileCheck %s -check-prefix=BASIC-GNU \ +# RUN: --implicit-check-not="Number of" --strict-whitespace --match-full-lines +# RUN: llvm-readelf -A %t.got.exe 2>&1 | FileCheck %s -check-prefix=BASIC-GNU \ +# RUN: --implicit-check-not="Number of" --strict-whitespace --match-full-lines + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: [[TYPE=ET_EXEC]] + Machine: EM_MIPS +Sections: + - Name: .got + Type: SHT_PROGBITS + Address: 0x1122 + ContentArray: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ## Lazy resolver. + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, ## Module pointer (GNU extension) + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, ## Local entry 1 + 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, ## Local entry 2 + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, ## Global entry 1 + 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB ] ## Global entry 2 + Size: [[SIZE=]] + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_MIPS_LOCAL_GOTNO + Value: 4 + - Tag: DT_MIPS_GOTSYM + Value: 1 + - Tag: DT_PLTGOT + Value: 0x1122 +DynamicSymbols: + - Name: foo + - Name: bar + Type: STT_FUNC + +# BASIC-LLVM: Primary GOT { +# BASIC-LLVM-NEXT: Canonical gp value: 0x9112 +# BASIC-LLVM-NEXT: Reserved entries [ +# BASIC-LLVM-NEXT: Entry { +# BASIC-LLVM-NEXT: Address: 0x1122 +# BASIC-LLVM-NEXT: Access: -32752 +# BASIC-LLVM-NEXT: Initial: 0x0 +# BASIC-LLVM-NEXT: Purpose: Lazy resolver +# BASIC-LLVM-NEXT: } +# BASIC-LLVM-NEXT: Entry { +# BASIC-LLVM-NEXT: Address: 0x112A +# BASIC-LLVM-NEXT: Access: -32744 +# BASIC-LLVM-NEXT: Initial: 0xFFFFFFFFFFFFFFFF +# BASIC-LLVM-NEXT: Purpose: Module pointer (GNU extension) +# BASIC-LLVM-NEXT: } +# BASIC-LLVM-NEXT: ] +# BASIC-LLVM-NEXT: Local entries [ +# BASIC-LLVM-NEXT: Entry { +# BASIC-LLVM-NEXT: Address: 0x1132 +# BASIC-LLVM-NEXT: Access: -32736 +# BASIC-LLVM-NEXT: Initial: 0x1111111111111111 +# BASIC-LLVM-NEXT: } +# BASIC-LLVM-NEXT: Entry { +# BASIC-LLVM-NEXT: Address: 0x113A +# BASIC-LLVM-NEXT: Access: -32728 +# BASIC-LLVM-NEXT: Initial: 0x2222222222222222 +# BASIC-LLVM-NEXT: } +# BASIC-LLVM-NEXT: ] +# BASIC-LLVM-NEXT: Global entries [ +# BASIC-LLVM-NEXT: Entry { +# BASIC-LLVM-NEXT: Address: 0x1142 +# BASIC-LLVM-NEXT: Access: -32720 +# BASIC-LLVM-NEXT: Initial: 0xAAAAAAAAAAAAAAAA +# BASIC-LLVM-NEXT: Value: 0x0 +# BASIC-LLVM-NEXT: Type: None (0x0) +# BASIC-LLVM-NEXT: Section: Undefined (0x0) +# BASIC-LLVM-NEXT: Name: foo (5) +# BASIC-LLVM-NEXT: } +# BASIC-LLVM-NEXT: Entry { +# BASIC-LLVM-NEXT: Address: 0x114A +# BASIC-LLVM-NEXT: Access: -32712 +# BASIC-LLVM-NEXT: Initial: 0xBBBBBBBBBBBBBBBB +# BASIC-LLVM-NEXT: Value: 0x0 +# BASIC-LLVM-NEXT: Type: Function (0x2) +# BASIC-LLVM-NEXT: Section: Undefined (0x0) +# BASIC-LLVM-NEXT: Name: bar (1) +# BASIC-LLVM-NEXT: } +# BASIC-LLVM-NEXT: ] +# BASIC-LLVM-NEXT: Number of TLS and multi-GOT entries: [[OTHERNUM]] +# BASIC-LLVM-NEXT: } + +# BASIC-GNU:Primary GOT: +# BASIC-GNU-NEXT: Canonical gp value: 0000000000009112 +# BASIC-GNU-EMPTY: +# BASIC-GNU-NEXT: Reserved entries: +# BASIC-GNU-NEXT: Address Access Initial Purpose +# BASIC-GNU-NEXT: 0000000000001122 -32752(gp) 0000000000000000 Lazy resolver +# BASIC-GNU-NEXT: 000000000000112a -32744(gp) ffffffffffffffff Module pointer (GNU extension) +# BASIC-GNU-EMPTY: +# BASIC-GNU-NEXT: Local entries: +# BASIC-GNU-NEXT: Address Access Initial +# BASIC-GNU-NEXT: 0000000000001132 -32736(gp) 1111111111111111 {{$}} +# BASIC-GNU-NEXT: 000000000000113a -32728(gp) 2222222222222222 {{$}} +# BASIC-GNU-EMPTY: +# BASIC-GNU-NEXT: Global entries: +# BASIC-GNU-NEXT: Address Access Initial Sym.Val. Type Ndx Name +# BASIC-GNU-NEXT: 0000000000001142 -32720(gp) aaaaaaaaaaaaaaaa 0000000000000000 NOTYPE UND foo +# BASIC-GNU-NEXT: 000000000000114a -32712(gp) bbbbbbbbbbbbbbbb 0000000000000000 FUNC UND bar +# BASIC-GNU-EMPTY: +# BASIC-GNU-OTHER-NEXT: Number of TLS and multi-GOT entries [[OTHERNUM]] + +## Check we are able to print the number of TLS and multi-GOT entries properly. + +# RUN: yaml2obj --docnum=1 %s -DSIZE=56 -o %t.other.entries1.exe +# RUN: yaml2obj --docnum=1 %s -DSIZE=64 -o %t.other.entries2.exe +# RUN: llvm-readobj -A %t.other.entries1.exe 2>&1 | FileCheck %s -DOTHERNUM=1 --check-prefix=BASIC-LLVM +# RUN: llvm-readobj -A %t.other.entries2.exe 2>&1 | FileCheck %s -DOTHERNUM=2 -check-prefix=BASIC-LLVM +# RUN: llvm-readelf -A %t.other.entries1.exe 2>&1 | FileCheck %s -DOTHERNUM=1 \ +# RUN: --check-prefixes=BASIC-GNU,BASIC-GNU-OTHER --strict-whitespace --match-full-lines +# RUN: llvm-readelf -A %t.other.entries2.exe 2>&1 | FileCheck %s -DOTHERNUM=2 \ +# RUN: --check-prefixes=BASIC-GNU,BASIC-GNU-OTHER --strict-whitespace --match-full-lines + +## Check how we dump 32-bit inputs. + +# RUN: yaml2obj --docnum=2 %s -o %t.got32.exe +# RUN: llvm-readobj -A %t.got32.exe 2>&1 | FileCheck %s -check-prefix=BASIC32-LLVM +# RUN: llvm-readelf -A %t.got32.exe 2>&1 | FileCheck %s -check-prefix=BASIC32-GNU \ +# RUN: --strict-whitespace --match-full-lines + +# BASIC32-LLVM: Primary GOT { +# BASIC32-LLVM-NEXT: Canonical gp value: 0x9112 +# BASIC32-LLVM-NEXT: Reserved entries [ +# BASIC32-LLVM-NEXT: Entry { +# BASIC32-LLVM-NEXT: Address: 0x1122 +# BASIC32-LLVM-NEXT: Access: -32752 +# BASIC32-LLVM-NEXT: Initial: 0x0 +# BASIC32-LLVM-NEXT: Purpose: Lazy resolver +# BASIC32-LLVM-NEXT: } +# BASIC32-LLVM-NEXT: Entry { +# BASIC32-LLVM-NEXT: Address: 0x1126 +# BASIC32-LLVM-NEXT: Access: -32748 +# BASIC32-LLVM-NEXT: Initial: 0xFFFFFFFF +# BASIC32-LLVM-NEXT: Purpose: Module pointer (GNU extension) +# BASIC32-LLVM-NEXT: } +# BASIC32-LLVM-NEXT: ] +# BASIC32-LLVM-NEXT: Local entries [ +# BASIC32-LLVM-NEXT: Entry { +# BASIC32-LLVM-NEXT: Address: 0x112A +# BASIC32-LLVM-NEXT: Access: -32744 +# BASIC32-LLVM-NEXT: Initial: 0x11111111 +# BASIC32-LLVM-NEXT: } +# BASIC32-LLVM-NEXT: Entry { +# BASIC32-LLVM-NEXT: Address: 0x112E +# BASIC32-LLVM-NEXT: Access: -32740 +# BASIC32-LLVM-NEXT: Initial: 0x22222222 +# BASIC32-LLVM-NEXT: } +# BASIC32-LLVM-NEXT: ] +# BASIC32-LLVM-NEXT: Global entries [ +# BASIC32-LLVM-NEXT: Entry { +# BASIC32-LLVM-NEXT: Address: 0x1132 +# BASIC32-LLVM-NEXT: Access: -32736 +# BASIC32-LLVM-NEXT: Initial: 0xAAAAAAAA +# BASIC32-LLVM-NEXT: Value: 0x0 +# BASIC32-LLVM-NEXT: Type: None (0x0) +# BASIC32-LLVM-NEXT: Section: Undefined (0x0) +# BASIC32-LLVM-NEXT: Name: foo (5) +# BASIC32-LLVM-NEXT: } +# BASIC32-LLVM-NEXT: Entry { +# BASIC32-LLVM-NEXT: Address: 0x1136 +# BASIC32-LLVM-NEXT: Access: -32732 +# BASIC32-LLVM-NEXT: Initial: 0xBBBBBBBB +# BASIC32-LLVM-NEXT: Value: 0x0 +# BASIC32-LLVM-NEXT: Type: Function (0x2) +# BASIC32-LLVM-NEXT: Section: Undefined (0x0) +# BASIC32-LLVM-NEXT: Name: bar (1) +# BASIC32-LLVM-NEXT: } +# BASIC32-LLVM-NEXT: ] +# BASIC32-LLVM-NEXT: Number of TLS and multi-GOT entries: 1 +# BASIC32-LLVM-NEXT: } + +# BASIC32-GNU:Primary GOT: +# BASIC32-GNU-NEXT: Canonical gp value: 00009112 +# BASIC32-GNU-EMPTY: +# BASIC32-GNU-NEXT: Reserved entries: +# BASIC32-GNU-NEXT: Address Access Initial Purpose +# BASIC32-GNU-NEXT: 00001122 -32752(gp) 00000000 Lazy resolver +# BASIC32-GNU-NEXT: 00001126 -32748(gp) ffffffff Module pointer (GNU extension) +# BASIC32-GNU-EMPTY: +# BASIC32-GNU-NEXT: Local entries: +# BASIC32-GNU-NEXT: Address Access Initial +# BASIC32-GNU-NEXT: 0000112a -32744(gp) 11111111 {{$}} +# BASIC32-GNU-NEXT: 0000112e -32740(gp) 22222222 {{$}} +# BASIC32-GNU-EMPTY: +# BASIC32-GNU-NEXT: Global entries: +# BASIC32-GNU-NEXT: Address Access Initial Sym.Val. Type Ndx Name +# BASIC32-GNU-NEXT: 00001132 -32736(gp) aaaaaaaa 00000000 NOTYPE UND foo +# BASIC32-GNU-NEXT: 00001136 -32732(gp) bbbbbbbb 00000000 FUNC UND bar +# BASIC32-GNU-EMPTY: +# BASIC32-GNU-NEXT: Number of TLS and multi-GOT entries 1 + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_MIPS +Sections: + - Name: .got + Type: SHT_PROGBITS + Address: 0x1122 + ContentArray: [ 0x00, 0x00, 0x00, 0x00, ## Lazy resolver. + 0xFF, 0xFF, 0xFF, 0xFF, ## Module pointer (GNU extension). + 0x11, 0x11, 0x11, 0x11, ## Local entry 1. + 0x22, 0x22, 0x22, 0x22, ## Local entry 2. + 0xAA, 0xAA, 0xAA, 0xAA, ## Global entry 1. + 0xBB, 0xBB, 0xBB, 0xBB, ## Global entry 2. + 0x00, 0x00, 0x00, 0x00 ] ## Other entry. + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_MIPS_LOCAL_GOTNO + Value: 4 + - Tag: DT_MIPS_GOTSYM + Value: 1 + - Tag: DT_PLTGOT + Value: 0x1122 +DynamicSymbols: + - Name: foo + - Name: bar + Type: STT_FUNC + # RUN: llvm-readobj -A %p/Inputs/got-empty.exe.mipsel | \ # RUN: FileCheck %s -check-prefix GOT-EMPTY # RUN: llvm-readobj -A %p/Inputs/got-static.exe.mips | \ # RUN: FileCheck %s -check-prefix GOT-STATIC -# RUN: llvm-readelf -A %p/Inputs/dynamic-table-exe.mips | \ -# RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-EXE -# RUN: llvm-readelf -A %p/Inputs/dynamic-table-so.mips | \ -# RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-SO -# RUN: llvm-readelf -A %p/Inputs/got-tls.so.elf-mips64el | \ -# RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-TLS # RUN: llvm-readelf -A %p/Inputs/got-empty.exe.mipsel | \ # RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-EMPTY # RUN: llvm-readelf -A %p/Inputs/got-static.exe.mips | \ # RUN: FileCheck %s --strict-whitespace -check-prefix GNU-GOT-STATIC -# GOT-EXE: Primary GOT { -# GOT-EXE-NEXT: Canonical gp value: 0x418880 -# GOT-EXE-NEXT: Reserved entries [ -# GOT-EXE-NEXT: Entry { -# GOT-EXE-NEXT: Address: 0x410890 -# GOT-EXE-NEXT: Access: -32752 -# GOT-EXE-NEXT: Initial: 0x0 -# GOT-EXE-NEXT: Purpose: Lazy resolver -# GOT-EXE-NEXT: } -# GOT-EXE-NEXT: Entry { -# GOT-EXE-NEXT: Address: 0x410894 -# GOT-EXE-NEXT: Access: -32748 -# GOT-EXE-NEXT: Initial: 0x80000000 -# GOT-EXE-NEXT: Purpose: Module pointer (GNU extension) -# GOT-EXE-NEXT: } -# GOT-EXE-NEXT: ] -# GOT-EXE-NEXT: Local entries [ -# GOT-EXE-NEXT: Entry { -# GOT-EXE-NEXT: Address: 0x410898 -# GOT-EXE-NEXT: Access: -32744 -# GOT-EXE-NEXT: Initial: 0x400418 -# GOT-EXE-NEXT: } -# GOT-EXE-NEXT: Entry { -# GOT-EXE-NEXT: Address: 0x41089C -# GOT-EXE-NEXT: Access: -32740 -# GOT-EXE-NEXT: Initial: 0x410840 -# GOT-EXE-NEXT: } -# GOT-EXE-NEXT: Entry { -# GOT-EXE-NEXT: Address: 0x4108A0 -# GOT-EXE-NEXT: Access: -32736 -# GOT-EXE-NEXT: Initial: 0x0 -# GOT-EXE-NEXT: } -# GOT-EXE-NEXT: ] -# GOT-EXE-NEXT: Global entries [ -# GOT-EXE-NEXT: Entry { -# GOT-EXE-NEXT: Address: 0x4108A4 -# GOT-EXE-NEXT: Access: -32732 -# GOT-EXE-NEXT: Initial: 0x0 -# GOT-EXE-NEXT: Value: 0x0 -# GOT-EXE-NEXT: Type: Function (0x2) -# GOT-EXE-NEXT: Section: Undefined (0x0) -# GOT-EXE-NEXT: Name: __gmon_start__ (1) -# GOT-EXE-NEXT: } -# GOT-EXE-NEXT: ] -# GOT-EXE-NEXT: Number of TLS and multi-GOT entries: 0 -# GOT-EXE-NEXT: } - -# GOT-SO: Primary GOT { -# GOT-SO-NEXT: Canonical gp value: 0x188D0 -# GOT-SO-NEXT: Reserved entries [ -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108E0 -# GOT-SO-NEXT: Access: -32752 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: Purpose: Lazy resolver -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108E4 -# GOT-SO-NEXT: Access: -32748 -# GOT-SO-NEXT: Initial: 0x80000000 -# GOT-SO-NEXT: Purpose: Module pointer (GNU extension) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: ] -# GOT-SO-NEXT: Local entries [ -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108E8 -# GOT-SO-NEXT: Access: -32744 -# GOT-SO-NEXT: Initial: 0x108E0 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108EC -# GOT-SO-NEXT: Access: -32740 -# GOT-SO-NEXT: Initial: 0x10000 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108F0 -# GOT-SO-NEXT: Access: -32736 -# GOT-SO-NEXT: Initial: 0x10920 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108F4 -# GOT-SO-NEXT: Access: -32732 -# GOT-SO-NEXT: Initial: 0x108CC -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108F8 -# GOT-SO-NEXT: Access: -32728 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x108FC -# GOT-SO-NEXT: Access: -32724 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x10900 -# GOT-SO-NEXT: Access: -32720 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x10904 -# GOT-SO-NEXT: Access: -32716 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: } -# GOT-SO-NEXT: ] -# GOT-SO-NEXT: Global entries [ -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x10908 -# GOT-SO-NEXT: Access: -32712 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: Value: 0x0 -# GOT-SO-NEXT: Type: None (0x0) -# GOT-SO-NEXT: Section: Undefined (0x0) -# GOT-SO-NEXT: Name: _ITM_registerTMCloneTable (87) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x1090C -# GOT-SO-NEXT: Access: -32708 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: Value: 0x0 -# GOT-SO-NEXT: Type: None (0x0) -# GOT-SO-NEXT: Section: Undefined (0x0) -# GOT-SO-NEXT: Name: _Jv_RegisterClasses (128) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x10910 -# GOT-SO-NEXT: Access: -32704 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: Value: 0x0 -# GOT-SO-NEXT: Type: Function (0x2) -# GOT-SO-NEXT: Section: Undefined (0x0) -# GOT-SO-NEXT: Name: __gmon_start__ (23) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x10914 -# GOT-SO-NEXT: Access: -32700 -# GOT-SO-NEXT: Initial: 0x840 -# GOT-SO-NEXT: Value: 0x840 -# GOT-SO-NEXT: Type: Function (0x2) -# GOT-SO-NEXT: Section: Undefined (0x0) -# GOT-SO-NEXT: Name: puts@GLIBC_2.0 (162) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x10918 -# GOT-SO-NEXT: Access: -32696 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: Value: 0x0 -# GOT-SO-NEXT: Type: None (0x0) -# GOT-SO-NEXT: Section: Undefined (0x0) -# GOT-SO-NEXT: Name: _ITM_deregisterTMCloneTable (59) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: Entry { -# GOT-SO-NEXT: Address: 0x1091C -# GOT-SO-NEXT: Access: -32692 -# GOT-SO-NEXT: Initial: 0x0 -# GOT-SO-NEXT: Value: 0x0 -# GOT-SO-NEXT: Type: Function (0x2) -# GOT-SO-NEXT: Section: Undefined (0x0) -# GOT-SO-NEXT: Name: __cxa_finalize@GLIBC_2.2 (113) -# GOT-SO-NEXT: } -# GOT-SO-NEXT: ] -# GOT-SO-NEXT: Number of TLS and multi-GOT entries: 0 -# GOT-SO-NEXT: } - -# GOT-TLS: Primary GOT { -# GOT-TLS-NEXT: Canonical gp value: 0x18BF0 -# GOT-TLS-NEXT: Reserved entries [ -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C00 -# GOT-TLS-NEXT: Access: -32752 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: Purpose: Lazy resolver -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C08 -# GOT-TLS-NEXT: Access: -32744 -# GOT-TLS-NEXT: Initial: 0x8000000000000000 -# GOT-TLS-NEXT: Purpose: Module pointer (GNU extension) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: ] -# GOT-TLS-NEXT: Local entries [ -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C10 -# GOT-TLS-NEXT: Access: -32736 -# GOT-TLS-NEXT: Initial: 0x10000 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C18 -# GOT-TLS-NEXT: Access: -32728 -# GOT-TLS-NEXT: Initial: 0x10C00 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C20 -# GOT-TLS-NEXT: Access: -32720 -# GOT-TLS-NEXT: Initial: 0x10CB8 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C28 -# GOT-TLS-NEXT: Access: -32712 -# GOT-TLS-NEXT: Initial: 0x10BF0 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C30 -# GOT-TLS-NEXT: Access: -32704 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C38 -# GOT-TLS-NEXT: Access: -32696 -# GOT-TLS-NEXT: Initial: 0x948 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C40 -# GOT-TLS-NEXT: Access: -32688 -# GOT-TLS-NEXT: Initial: 0xA20 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C48 -# GOT-TLS-NEXT: Access: -32680 -# GOT-TLS-NEXT: Initial: 0xAF0 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C50 -# GOT-TLS-NEXT: Access: -32672 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C58 -# GOT-TLS-NEXT: Access: -32664 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C60 -# GOT-TLS-NEXT: Access: -32656 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: ] -# GOT-TLS-NEXT: Global entries [ -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C68 -# GOT-TLS-NEXT: Access: -32648 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: Value: 0x0 -# GOT-TLS-NEXT: Type: None (0x0) -# GOT-TLS-NEXT: Section: Undefined (0x0) -# GOT-TLS-NEXT: Name: _ITM_registerTMCloneTable (78) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C70 -# GOT-TLS-NEXT: Access: -32640 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: Value: 0x0 -# GOT-TLS-NEXT: Type: None (0x0) -# GOT-TLS-NEXT: Section: Undefined (0x0) -# GOT-TLS-NEXT: Name: _Jv_RegisterClasses (119) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C78 -# GOT-TLS-NEXT: Access: -32632 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: Value: 0x0 -# GOT-TLS-NEXT: Type: Function (0x2) -# GOT-TLS-NEXT: Section: Undefined (0x0) -# GOT-TLS-NEXT: Name: __gmon_start__ (23) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C80 -# GOT-TLS-NEXT: Access: -32624 -# GOT-TLS-NEXT: Initial: 0xB60 -# GOT-TLS-NEXT: Value: 0xB60 -# GOT-TLS-NEXT: Type: Function (0x2) -# GOT-TLS-NEXT: Section: Undefined (0x0) -# GOT-TLS-NEXT: Name: __tls_get_addr@GLIBC_2.3 (150) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C88 -# GOT-TLS-NEXT: Access: -32616 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: Value: 0x0 -# GOT-TLS-NEXT: Type: None (0x0) -# GOT-TLS-NEXT: Section: Undefined (0x0) -# GOT-TLS-NEXT: Name: _ITM_deregisterTMCloneTable (50) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: Entry { -# GOT-TLS-NEXT: Address: 0x10C90 -# GOT-TLS-NEXT: Access: -32608 -# GOT-TLS-NEXT: Initial: 0x0 -# GOT-TLS-NEXT: Value: 0x0 -# GOT-TLS-NEXT: Type: Function (0x2) -# GOT-TLS-NEXT: Section: Undefined (0x0) -# GOT-TLS-NEXT: Name: __cxa_finalize@GLIBC_2.2 (104) -# GOT-TLS-NEXT: } -# GOT-TLS-NEXT: ] -# GOT-TLS-NEXT: Number of TLS and multi-GOT entries: 4 -# GOT-TLS-NEXT: } - # GOT-EMPTY: Primary GOT { # GOT-EMPTY-NEXT: Canonical gp value: 0x409FF0 # GOT-EMPTY-NEXT: Reserved entries [ @@ -374,95 +306,6 @@ # GOT-STATIC-NEXT: ] # GOT-STATIC-NEXT: } -# GNU-GOT-EXE: Primary GOT: -# GNU-GOT-EXE-NEXT: Canonical gp value: 00418880 - -# GNU-GOT-EXE: Reserved entries: -# GNU-GOT-EXE-NEXT: Address Access Initial Purpose -# GNU-GOT-EXE-NEXT: 00410890 -32752(gp) 00000000 Lazy resolver -# GNU-GOT-EXE-NEXT: 00410894 -32748(gp) 80000000 Module pointer (GNU extension) - -# GNU-GOT-EXE: Local entries: -# GNU-GOT-EXE-NEXT: Address Access Initial -# GNU-GOT-EXE-NEXT: 00410898 -32744(gp) 00400418 -# GNU-GOT-EXE-NEXT: 0041089c -32740(gp) 00410840 -# GNU-GOT-EXE-NEXT: 004108a0 -32736(gp) 00000000 - -# GNU-GOT-EXE: Global entries: -# GNU-GOT-EXE-NEXT: Address Access Initial Sym.Val. Type Ndx Name -# GNU-GOT-EXE-NEXT: 004108a4 -32732(gp) 00000000 00000000 FUNC UND __gmon_start__ - -# GNU-GOT-EXE: PLT GOT: - -# GNU-GOT-EXE: Reserved entries: -# GNU-GOT-EXE-NEXT: Address Initial Purpose -# GNU-GOT-EXE-NEXT: 00410854 00000000 PLT lazy resolver -# GNU-GOT-EXE-NEXT: 00410858 00000000 Module pointer - -# GNU-GOT-EXE: Entries: -# GNU-GOT-EXE-NEXT: Address Initial Sym.Val. Type Ndx Name -# GNU-GOT-EXE-NEXT: 0041085c 00400800 00000000 FUNC UND puts -# GNU-GOT-EXE-NEXT: 00410860 00400800 00000000 FUNC UND __libc_start_main - -# GNU-GOT-SO: Primary GOT: -# GNU-GOT-SO-NEXT: Canonical gp value: 000188d0 - -# GNU-GOT-SO: Reserved entries: -# GNU-GOT-SO-NEXT: Address Access Initial Purpose -# GNU-GOT-SO-NEXT: 000108e0 -32752(gp) 00000000 Lazy resolver -# GNU-GOT-SO-NEXT: 000108e4 -32748(gp) 80000000 Module pointer (GNU extension) - -# GNU-GOT-SO: Local entries: -# GNU-GOT-SO-NEXT: Address Access Initial -# GNU-GOT-SO-NEXT: 000108e8 -32744(gp) 000108e0 -# GNU-GOT-SO-NEXT: 000108ec -32740(gp) 00010000 -# GNU-GOT-SO-NEXT: 000108f0 -32736(gp) 00010920 -# GNU-GOT-SO-NEXT: 000108f4 -32732(gp) 000108cc -# GNU-GOT-SO-NEXT: 000108f8 -32728(gp) 00000000 -# GNU-GOT-SO-NEXT: 000108fc -32724(gp) 00000000 -# GNU-GOT-SO-NEXT: 00010900 -32720(gp) 00000000 -# GNU-GOT-SO-NEXT: 00010904 -32716(gp) 00000000 - -# GNU-GOT-SO: Global entries: -# GNU-GOT-SO-NEXT: Address Access Initial Sym.Val. Type Ndx Name -# GNU-GOT-SO-NEXT: 00010908 -32712(gp) 00000000 00000000 NOTYPE UND _ITM_registerTMCloneTable -# GNU-GOT-SO-NEXT: 0001090c -32708(gp) 00000000 00000000 NOTYPE UND _Jv_RegisterClasses -# GNU-GOT-SO-NEXT: 00010910 -32704(gp) 00000000 00000000 FUNC UND __gmon_start__ -# GNU-GOT-SO-NEXT: 00010914 -32700(gp) 00000840 00000840 FUNC UND puts -# GNU-GOT-SO-NEXT: 00010918 -32696(gp) 00000000 00000000 NOTYPE UND _ITM_deregisterTMCloneTable -# GNU-GOT-SO-NEXT: 0001091c -32692(gp) 00000000 00000000 FUNC UND __cxa_finalize - -# GNU-GOT-TLS: Primary GOT: -# GNU-GOT-TLS-NEXT: Canonical gp value: 0000000000018bf0 - -# GNU-GOT-TLS: Reserved entries: -# GNU-GOT-TLS-NEXT: Address Access Initial Purpose -# GNU-GOT-TLS-NEXT: 0000000000010c00 -32752(gp) 0000000000000000 Lazy resolver -# GNU-GOT-TLS-NEXT: 0000000000010c08 -32744(gp) 8000000000000000 Module pointer (GNU extension) - -# GNU-GOT-TLS: Local entries: -# GNU-GOT-TLS-NEXT: Address Access Initial -# GNU-GOT-TLS-NEXT: 0000000000010c10 -32736(gp) 0000000000010000 -# GNU-GOT-TLS-NEXT: 0000000000010c18 -32728(gp) 0000000000010c00 -# GNU-GOT-TLS-NEXT: 0000000000010c20 -32720(gp) 0000000000010cb8 -# GNU-GOT-TLS-NEXT: 0000000000010c28 -32712(gp) 0000000000010bf0 -# GNU-GOT-TLS-NEXT: 0000000000010c30 -32704(gp) 0000000000000000 -# GNU-GOT-TLS-NEXT: 0000000000010c38 -32696(gp) 0000000000000948 -# GNU-GOT-TLS-NEXT: 0000000000010c40 -32688(gp) 0000000000000a20 -# GNU-GOT-TLS-NEXT: 0000000000010c48 -32680(gp) 0000000000000af0 -# GNU-GOT-TLS-NEXT: 0000000000010c50 -32672(gp) 0000000000000000 -# GNU-GOT-TLS-NEXT: 0000000000010c58 -32664(gp) 0000000000000000 -# GNU-GOT-TLS-NEXT: 0000000000010c60 -32656(gp) 0000000000000000 - -# GNU-GOT-TLS: Global entries: -# GNU-GOT-TLS-NEXT: Address Access Initial Sym.Val. Type Ndx Name -# GNU-GOT-TLS-NEXT: 0000000000010c68 -32648(gp) 0000000000000000 0000000000000000 NOTYPE UND _ITM_registerTMCloneTable -# GNU-GOT-TLS-NEXT: 0000000000010c70 -32640(gp) 0000000000000000 0000000000000000 NOTYPE UND _Jv_RegisterClasses -# GNU-GOT-TLS-NEXT: 0000000000010c78 -32632(gp) 0000000000000000 0000000000000000 FUNC UND __gmon_start__ -# GNU-GOT-TLS-NEXT: 0000000000010c80 -32624(gp) 0000000000000b60 0000000000000b60 FUNC UND __tls_get_addr -# GNU-GOT-TLS-NEXT: 0000000000010c88 -32616(gp) 0000000000000000 0000000000000000 NOTYPE UND _ITM_deregisterTMCloneTable -# GNU-GOT-TLS-NEXT: 0000000000010c90 -32608(gp) 0000000000000000 0000000000000000 FUNC UND __cxa_finalize - # GNU-GOTY : Primary GOT: # GNU-GOT-EMPTY: Canonical gp value: 00409ff0 @@ -487,17 +330,17 @@ ## Check we report errors when dynamic tags, needed for dumping GOT, are missing. -# RUN: yaml2obj --docnum=1 -DTAG1=DT_MIPS_LOCAL_GOTNO -DTAG2=DT_MIPS_GOTSYM %s -o %t.err1.o +# RUN: yaml2obj --docnum=3 -DTAG1=DT_MIPS_LOCAL_GOTNO -DTAG2=DT_MIPS_GOTSYM %s -o %t.err1.o # RUN: not llvm-readobj -A %t.err1.o 2>&1 | FileCheck %s -DFILE=%t.err1.o -check-prefix ERR1 # ERR1: error: '[[FILE]]': cannot find PLTGOT dynamic tag -# RUN: yaml2obj --docnum=1 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_GOTSYM %s -o %t.err2.o +# RUN: yaml2obj --docnum=3 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_GOTSYM %s -o %t.err2.o # RUN: not llvm-readobj -A %t.err2.o 2>&1 | FileCheck %s -DFILE=%t.err2.o -check-prefix ERR2 # ERR2: error: '[[FILE]]': cannot find MIPS_LOCAL_GOTNO dynamic tag -# RUN: yaml2obj --docnum=1 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_LOCAL_GOTNO %s -o %t.err3.o +# RUN: yaml2obj --docnum=3 -DTAG1=DT_PLTGOT -DTAG2=DT_MIPS_LOCAL_GOTNO %s -o %t.err3.o # RUN: not llvm-readobj -A %t.err3.o 2>&1 | FileCheck %s -DFILE=%t.err3.o -check-prefix ERR3 # ERR3: error: '[[FILE]]': cannot find MIPS_GOTSYM dynamic tag @@ -520,12 +363,12 @@ Sections: Value: 0 DynamicSymbols: [] -# RUN: yaml2obj --docnum=2 -DVAL1=0xffff %s -o %t.err4.o +# RUN: yaml2obj --docnum=4 -DVAL1=0xffff %s -o %t.err4.o # RUN: not llvm-readobj -A %t.err4.o 2>&1 | FileCheck %s -DFILE=%t.err4.o -check-prefix=ERR4 # ERR4: error: '[[FILE]]': DT_MIPS_GOTSYM value (65535) exceeds the number of dynamic symbols (1) -# RUN: yaml2obj --docnum=2 -DVAL2=0xffff %s -o %t.err5.o +# RUN: yaml2obj --docnum=4 -DVAL2=0xffff %s -o %t.err5.o # RUN: not llvm-readobj -A %t.err5.o 2>&1 | FileCheck %s -DFILE=%t.err5.o -check-prefix=ERR5 # ERR5: error: '[[FILE]]': there is no non-empty GOT section at 0xffff @@ -549,7 +392,7 @@ Sections: DynamicSymbols: [] ## Check that we do not report a warning about the .got section when we are able to locate it by name. -# RUN: yaml2obj --docnum=3 -DNAME=0xffff %s -o %t.err6.o +# RUN: yaml2obj --docnum=5 -DNAME=0xffff %s -o %t.err6.o # RUN: llvm-readobj -A %t.err6.o 2>&1 | \ # RUN: FileCheck %s -DFILE=%t.err6.o -check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: # RUN: llvm-readelf -A %t.err6.o 2>&1 | \ @@ -574,7 +417,7 @@ Sections: ShName: [[NAME=]] ## Check we report a warning when we are unable to find the .got section due to an error. -# RUN: yaml2obj --docnum=3 -DGOTNAME=0xffff %s -o %t.err7.o +# RUN: yaml2obj --docnum=5 -DGOTNAME=0xffff %s -o %t.err7.o # RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: # RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: @@ -584,7 +427,7 @@ Sections: ## sections with the same address as the .got section. ## In this test the empty .data section has the same address as the .got section. -# RUN: yaml2obj --docnum=4 %s -o %t.err7.o +# RUN: yaml2obj --docnum=6 %s -o %t.err7.o # RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-LLVM # RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-GNU @@ -653,7 +496,7 @@ DynamicSymbols: - Name: foo ## Check how we print global GOT entries when they are unnamed section symbols. -# RUN: yaml2obj --docnum=5 %s -o %t.err8.o +# RUN: yaml2obj --docnum=7 %s -o %t.err8.o # RUN: llvm-readobj -A %t.err8.o 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-LLVM # RUN: llvm-readelf -A %t.err8.o 2>&1 | FileCheck %s -DFILE=%t.err8.o --check-prefix=SEC-SYMS-GNU From 05659606a2af76710fb19a65fbd1a6c88ba12dad Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 30 Sep 2020 09:43:29 +0100 Subject: [PATCH 129/544] Revert "[gardening] Replace some uses of setDebugLoc(DebugLoc()) with dropLocation(), NFC" Some of the buildbots have croaked with this patch, for examples failures that begin in this build: http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux/builds/29933 This reverts commit 674f57870f4c8a7fd7b629bffc85b149cbefd3e0. --- llvm/lib/Transforms/Scalar/LICM.cpp | 2 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index bc581e7ad40f3..631fa2f27c5b3 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2159,7 +2159,7 @@ bool llvm::promoteLoopAccessesToScalars( if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); - PreheaderLoad->dropLocation(); + PreheaderLoad->setDebugLoc(DebugLoc()); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1672293380d7b..124a7c423e72c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2218,7 +2218,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // be misleading while debugging. for (auto &I : *ThenBB) { if (!SpeculatedStoreValue || &I != SpeculatedStore) - I.dropLocation(); + I.setDebugLoc(DebugLoc()); I.dropUnknownNonDebugMetadata(); } @@ -2878,7 +2878,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // When we fold the bonus instructions we want to make sure we // reset their debug locations in order to avoid stepping on dead // code caused by folding dead branches. - NewBonusInst->dropLocation(); + NewBonusInst->setDebugLoc(DebugLoc()); RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); @@ -2902,7 +2902,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // Reset the condition debug location to avoid jumping on dead code // as the result of folding dead branches. - CondInPred->dropLocation(); + CondInPred->setDebugLoc(DebugLoc()); RemapInstruction(CondInPred, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); From 6342b38c5fee74df94d7b0c34e5a93b9b22763df Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 30 Sep 2020 10:56:43 +0200 Subject: [PATCH 130/544] [clangd] Fix member/type name conflict caught by buildbots. --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 16 ++++++++-------- clang-tools-extra/clangd/ClangdLSPServer.h | 2 +- clang-tools-extra/clangd/tool/ClangdMain.cpp | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 4d9c0a43d68df..dfd26ad40b89c 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -395,7 +395,7 @@ class ClangdLSPServer::MessageHandler : public Transport::MessageHandler { Context handlerContext() const { return Context::current().derive( kCurrentOffsetEncoding, - Server.Opts.OffsetEncoding.getValueOr(OffsetEncoding::UTF16)); + Server.Opts.Encoding.getValueOr(OffsetEncoding::UTF16)); } // We run cancelable requests in a context that does two things: @@ -465,11 +465,11 @@ static std::vector semanticTokenTypes() { void ClangdLSPServer::onInitialize(const InitializeParams &Params, Callback Reply) { // Determine character encoding first as it affects constructed ClangdServer. - if (Params.capabilities.offsetEncoding && !Opts.OffsetEncoding) { - Opts.OffsetEncoding = OffsetEncoding::UTF16; // fallback + if (Params.capabilities.offsetEncoding && !Opts.Encoding) { + Opts.Encoding = OffsetEncoding::UTF16; // fallback for (OffsetEncoding Supported : *Params.capabilities.offsetEncoding) if (Supported != OffsetEncoding::UnsupportedEncoding) { - Opts.OffsetEncoding = Supported; + Opts.Encoding = Supported; break; } } @@ -509,8 +509,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, // Server, CDB, etc. WithContext MainContext(BackgroundContext.clone()); llvm::Optional WithOffsetEncoding; - if (Opts.OffsetEncoding) - WithOffsetEncoding.emplace(kCurrentOffsetEncoding, *Opts.OffsetEncoding); + if (Opts.Encoding) + WithOffsetEncoding.emplace(kCurrentOffsetEncoding, *Opts.Encoding); Server.emplace(*CDB, TFS, Opts, static_cast(this)); } @@ -620,8 +620,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, }}, {"typeHierarchyProvider", true}, }}}}; - if (Opts.OffsetEncoding) - Result["offsetEncoding"] = *Opts.OffsetEncoding; + if (Opts.Encoding) + Result["offsetEncoding"] = *Opts.Encoding; if (Opts.TheiaSemanticHighlighting) Result.getObject("capabilities") ->insert( diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index 3dc679c595105..e8823d37c55d3 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -44,7 +44,7 @@ class ClangdLSPServer : private ClangdServer::Callbacks { /// If not set, we search upward from the source file. llvm::Optional CompileCommandsDir; /// The offset-encoding to use, or None to negotiate it over LSP. - llvm::Optional OffsetEncoding; + llvm::Optional Encoding; /// Per-feature options. Generally ClangdServer lets these vary /// per-request, but LSP allows limited/no customizations. diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 60a6c267591cc..a897a9a3531d2 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -820,7 +820,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var return true; }; if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding) - Opts.OffsetEncoding = ForceOffsetEncoding; + Opts.Encoding = ForceOffsetEncoding; // Shall we allow to customize the file limit? Opts.Rename.AllowCrossFile = CrossFileRename; From d99f46c6eb8debaa1a14c122956177dc2a40ef9b Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 30 Sep 2020 11:02:05 +0200 Subject: [PATCH 131/544] [clangd] Fix fuzzer build after 7ba0779fbb41b6fa8 --- clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp b/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp index e620342499bf7..41c603b5fd21a 100644 --- a/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp +++ b/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp @@ -14,8 +14,6 @@ #include "ClangdLSPServer.h" #include "ClangdServer.h" -#include "CodeComplete.h" -#include "refactor/Rename.h" #include "support/ThreadsafeFS.h" #include #include @@ -33,12 +31,12 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { /*Style=*/JSONStreamStyle::Delimited); RealThreadsafeFS FS; CodeCompleteOptions CCOpts; - CCOpts.EnableSnippets = false; - ClangdServer::Options Opts; + ClangdLSPServer::Options Opts; + Opts.CodeComplete.EnableSnippets = false; + Opts.UseDirBasedCDB = false; // Initialize and run ClangdLSPServer. - ClangdLSPServer LSPServer(*Transport, FS, CCOpts, RenameOptions(), llvm::None, - false, llvm::None, Opts); + ClangdLSPServer LSPServer(*Transport, FS, Opts); LSPServer.run(); return 0; } From 64e8fd540ecc38ee3daf942499091589785e2733 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 29 Sep 2020 20:15:03 +0200 Subject: [PATCH 132/544] [clangd][remote] Make sure relative paths are absolute with respect to posix style Relative paths received from the server are always in posix style. So we need to ensure they are relative using that style, and not the native one. Differential Revision: https://reviews.llvm.org/D88507 --- .../clangd/index/remote/marshalling/Marshalling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp index 31ce4a44ea55a..d61848f295a3f 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp @@ -299,7 +299,7 @@ Marshaller::relativePathToURI(llvm::StringRef RelativePath) { assert(RelativePath == llvm::sys::path::convert_to_slash(RelativePath)); if (RelativePath.empty()) return error("Empty relative path."); - if (llvm::sys::path::is_absolute(RelativePath)) + if (llvm::sys::path::is_absolute(RelativePath, llvm::sys::path::Style::posix)) return error("RelativePath '{0}' is absolute.", RelativePath); llvm::SmallString<256> FullPath = llvm::StringRef(*LocalIndexRoot); llvm::sys::path::append(FullPath, RelativePath); From 0249df33fec16b728e2d33cae02f5da4c9f74e38 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 29 Sep 2020 16:35:42 +0200 Subject: [PATCH 133/544] [AMDGPU] Do not generate mul with 1 in AMDGPU Atomic Optimizer Check if operand of mul is constant value of one for certain atomic instructions in order to avoid making unnecessary instructions when -amdgpu-atomic-optimizer is present. Differential Revision: https://reviews.llvm.org/D88315 --- .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 13 +- .../atomic_optimizations_mul_one.ll | 297 ++++++++++++++++++ 2 files changed, 306 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index c9d25d4250d55..1215d9d0550ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -404,6 +404,11 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op, } } +static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) { + const ConstantInt *CI = dyn_cast(LHS); + return (CI && CI->isOne()) ? RHS : B.CreateMul(LHS, RHS); +} + void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx, @@ -523,7 +528,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // old value times the number of active lanes. Value *const Ctpop = B.CreateIntCast( B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); - NewV = B.CreateMul(V, Ctpop); + NewV = buildMul(B, V, Ctpop); break; } @@ -543,7 +548,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // old value times the parity of the number of active lanes. Value *const Ctpop = B.CreateIntCast( B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); - NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1)); + NewV = buildMul(B, V, B.CreateAnd(Ctpop, 1)); break; } } @@ -622,7 +627,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, llvm_unreachable("Unhandled atomic op"); case AtomicRMWInst::Add: case AtomicRMWInst::Sub: - LaneOffset = B.CreateMul(V, Mbcnt); + LaneOffset = buildMul(B, V, Mbcnt); break; case AtomicRMWInst::And: case AtomicRMWInst::Or: @@ -633,7 +638,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, LaneOffset = B.CreateSelect(Cond, Identity, V); break; case AtomicRMWInst::Xor: - LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); + LaneOffset = buildMul(B, V, B.CreateAnd(Mbcnt, 1)); break; } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll new file mode 100644 index 0000000000000..98db0dccc9867 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll @@ -0,0 +1,297 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer -verify-machineinstrs %s | FileCheck -check-prefix=IR %s +; RUN: llc -global-isel -mtriple=amdgcn-- -amdgpu-atomic-optimizations -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg) +declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg) +declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg) +declare void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32 immarg) + +define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg) { +; IR-LABEL: @atomic_add( +; IR-NEXT: .entry: +; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) +; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]]) +; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0 +; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]] +; IR: 9: +; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0) +; IR-NEXT: br label [[TMP11]] +; IR: 11: +; IR-NEXT: ret void +; +; GCN-LABEL: atomic_add: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0 +; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_cbranch_execz BB0_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: buffer_atomic_add v0, v1, s[0:3], 0 idxen glc +; GCN-NEXT: BB0_2: +; GCN-NEXT: s_endpgm +.entry: + call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) { +; IR-LABEL: @atomic_add_and_format( +; IR-NEXT: .entry: +; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) +; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]]) +; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0 +; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]] +; IR: 9: +; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0) +; IR-NEXT: br label [[TMP11]] +; IR: 11: +; IR-NEXT: [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ] +; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]]) +; IR-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP5]] +; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0) +; IR-NEXT: ret void +; +; GCN-LABEL: atomic_add_and_format: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 +; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s7, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_cbranch_execz BB1_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: buffer_atomic_add v1, v2, s[0:3], 0 idxen glc +; GCN-NEXT: BB1_2: +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readfirstlane_b32 s4, v1 +; GCN-NEXT: v_add_i32_e32 v4, vcc, s4, v0 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen +; GCN-NEXT: s_endpgm +.entry: + %a = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> %arg, <4 x i32> %arg, i32 %a, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @atomic_sub(<4 x i32> inreg %arg) { +; IR-LABEL: @atomic_sub( +; IR-NEXT: .entry: +; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) +; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]]) +; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0 +; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]] +; IR: 9: +; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0) +; IR-NEXT: br label [[TMP11]] +; IR: 11: +; IR-NEXT: ret void +; +; GCN-LABEL: atomic_sub: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0 +; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_cbranch_execz BB2_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: buffer_atomic_sub v0, v1, s[0:3], 0 idxen glc +; GCN-NEXT: BB2_2: +; GCN-NEXT: s_endpgm +.entry: + call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) { +; IR-LABEL: @atomic_sub_and_format( +; IR-NEXT: .entry: +; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) +; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]]) +; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0 +; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]] +; IR: 9: +; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0) +; IR-NEXT: br label [[TMP11]] +; IR: 11: +; IR-NEXT: [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ] +; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]]) +; IR-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], [[TMP5]] +; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0) +; IR-NEXT: ret void +; +; GCN-LABEL: atomic_sub_and_format: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 +; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s7, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_cbranch_execz BB3_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc +; GCN-NEXT: BB3_2: +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readfirstlane_b32 s4, v1 +; GCN-NEXT: v_sub_i32_e32 v4, vcc, s4, v0 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen +; GCN-NEXT: s_endpgm +.entry: + %a = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> %arg, <4 x i32> %arg, i32 %a, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @atomic_xor(<4 x i32> inreg %arg) { +; IR-LABEL: @atomic_xor( +; IR-NEXT: .entry: +; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) +; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]]) +; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; IR-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 1 +; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], 0 +; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] +; IR: 10: +; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0) +; IR-NEXT: br label [[TMP12]] +; IR: 12: +; IR-NEXT: ret void +; +; GCN-LABEL: atomic_xor: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0 +; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_cbranch_execz BB4_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GCN-NEXT: s_and_b32 s4, s4, 1 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc +; GCN-NEXT: BB4_2: +; GCN-NEXT: s_endpgm +.entry: + call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) { +; IR-LABEL: @atomic_xor_and_format( +; IR-NEXT: .entry: +; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) +; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]]) +; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]]) +; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; IR-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 1 +; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], 0 +; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] +; IR: 10: +; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0) +; IR-NEXT: br label [[TMP12]] +; IR: 12: +; IR-NEXT: [[TMP13:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ] +; IR-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP13]]) +; IR-NEXT: [[TMP15:%.*]] = and i32 [[TMP5]], 1 +; IR-NEXT: [[TMP16:%.*]] = xor i32 [[TMP14]], [[TMP15]] +; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP16]], i32 0, i32 0, i32 0) +; IR-NEXT: ret void +; +; GCN-LABEL: atomic_xor_and_format: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 +; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s7, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_cbranch_execz BB5_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7] +; GCN-NEXT: s_and_b32 s6, s6, 1 +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc +; GCN-NEXT: BB5_2: +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readfirstlane_b32 s4, v1 +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_xor_b32_e32 v4, s4, v0 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen +; GCN-NEXT: s_endpgm +.entry: + %a = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> %arg, <4 x i32> %arg, i32 %a, i32 0, i32 0, i32 0) + ret void +} From cdac4492b4a523a888a013d42ea0a968f684ed59 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 28 Sep 2020 09:30:58 +0100 Subject: [PATCH 134/544] [SplitKit] Cope with no live subranges in defFromParent Following on from D87757 "[SplitKit] Only copy live lanes", it is possible to split a live range at a point when none of its subranges are live. This patch handles that case by inserting an implicit def of the superreg. Patch by Quentin Colombet! Differential Revision: https://reviews.llvm.org/D88397 --- llvm/lib/CodeGen/SplitKit.cpp | 12 ++++-- .../AMDGPU/splitkit-nolivesubranges.mir | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index f62aa8de5acc2..9e7d6d231e624 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -663,13 +663,19 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, if (S.liveAt(UseIdx)) LaneMask |= S.LaneMask; } - assert(LaneMask.any() && "Interval has no live subranges"); } else { LaneMask = LaneBitmask::getAll(); } - ++NumCopies; - Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx); + if (LaneMask.none()) { + const MCInstrDesc &Desc = TII.get(TargetOpcode::IMPLICIT_DEF); + MachineInstr *ImplicitDef = BuildMI(MBB, I, DebugLoc(), Desc, Reg); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); + Def = Indexes.insertMachineInstrInMaps(*ImplicitDef, Late).getRegSlot(); + } else { + ++NumCopies; + Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx); + } } // Define the value in Reg. diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir new file mode 100644 index 0000000000000..0827c6a387bb7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir @@ -0,0 +1,42 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=greedy,virtregrewriter -verify-regalloc %s -o - | FileCheck %s + +# This test aims to trigger live-range splitting at a place where %0 subranges +# are all dead, but the main live-range of %0 is still alive. %0 main range is +# kept alive simply by not using undef. Then, the splitting is triggered by +# creating two points of high register pressure: +# - One where %0 main range is the only live live-range: this will force the +# insertion of a split for %0 main range. +# - One where %0.subrange is live, so that the interference check in regalloc +# triggers the splitting heuristic. +--- +name: func0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: func0 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: renamable $sgpr0 = IMPLICIT_DEF + ; CHECK: renamable $sgpr1 = IMPLICIT_DEF + ; CHECK: $sgpr104 = S_AND_B32 renamable $sgpr0, renamable $sgpr1, implicit-def $scc + ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 + ; CHECK: renamable $sgpr0_sgpr1 = IMPLICIT_DEF + ; CHECK: renamable $sgpr0 = IMPLICIT_DEF + ; CHECK: renamable $sgpr1 = IMPLICIT_DEF + ; CHECK: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $private_rsrc_reg, implicit $sp_reg :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 + ; CHECK: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $private_rsrc_reg, implicit $sp_reg :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: $sgpr105 = S_AND_B32 renamable $sgpr1, renamable $sgpr1, implicit-def $scc + ; CHECK: S_NOP 0, implicit $sgpr104, implicit $sgpr105 + %0:sreg_64 = COPY $sgpr0_sgpr1 + %0.sub0:sreg_64 = IMPLICIT_DEF + %0.sub1:sreg_64 = IMPLICIT_DEF + $sgpr104 = S_AND_B32 %0.sub0, %0.sub1, implicit-def $scc + KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 + %0.sub0:sreg_64 = IMPLICIT_DEF + %0.sub1:sreg_64 = IMPLICIT_DEF + KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 + $sgpr105 = S_AND_B32 %0.sub1, %0.sub1, implicit-def $scc + S_NOP 0, implicit $sgpr104, implicit $sgpr105 +... From 9f5da55f5d9299a76a4dfb67ef0324dbc1900826 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 24 Sep 2020 17:24:29 +0200 Subject: [PATCH 135/544] [SystemZ] Support bare nop instructions Add support of "nop" and "nopr" (without operands) to assembler. Review: Ulrich Weigand --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 8 ++++++-- llvm/test/MC/SystemZ/insn-good.s | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index d5d56ecf6e47b..a2e207aedcde2 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -101,10 +101,14 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { } } -// NOPs. These are again variants of the conditional branches, -// with the condition mask set to "never". +// NOPs. These are again variants of the conditional branches, with the +// condition mask set to "never". NOP_bare can't be an InstAlias since it +// would need R0D hard coded which is not part of ADDR64BitRegClass. def NOP : InstAlias<"nop\t$XBD", (BCAsm 0, bdxaddr12only:$XBD), 0>; +let isAsmParserOnly = 1, hasNoSchedulingInfo = 1, M1 = 0, XBD2 = 0 in + def NOP_bare : InstRXb<0x47,(outs), (ins), "nop", []>; def NOPR : InstAlias<"nopr\t$R", (BCRAsm 0, GR64:$R), 0>; +def NOPR_bare : InstAlias<"nopr", (BCRAsm 0, R0D), 0>; // Fused compare-and-branch instructions. // diff --git a/llvm/test/MC/SystemZ/insn-good.s b/llvm/test/MC/SystemZ/insn-good.s index 07f721bfa5e49..f75db59211b39 100644 --- a/llvm/test/MC/SystemZ/insn-good.s +++ b/llvm/test/MC/SystemZ/insn-good.s @@ -12284,10 +12284,14 @@ niy 524287(%r15), 42 #CHECK: bc 0, 0 # encoding: [0x47,0x00,0x00,0x00] +#CHECK: nop # encoding: [0x47,0x00,0x00,0x00] #CHECK: bcr 0, %r7 # encoding: [0x07,0x07] +#CHECK: bcr 0, %r0 # encoding: [0x07,0x00] nop 0 + nop nopr %r7 + nopr #CHECK: nr %r0, %r0 # encoding: [0x14,0x00] #CHECK: nr %r0, %r15 # encoding: [0x14,0x0f] From 8c05c7c8d87c7ab02fca2a789dfcca4976c6601b Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Wed, 30 Sep 2020 12:05:17 +0300 Subject: [PATCH 136/544] [MLIR][SPIRV] Support different function control in (de)serialization Added support for different function control in serialization and deserialization. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D88280 --- .../Dialect/SPIRV/Serialization/Deserializer.cpp | 14 ++++---------- .../lib/Dialect/SPIRV/Serialization/Serializer.cpp | 3 +-- mlir/test/Dialect/SPIRV/Serialization/module.mlir | 4 ++-- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp index eaa8f4d94833d..b5eea43338243 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp @@ -805,16 +805,10 @@ LogicalResult Deserializer::processFunction(ArrayRef operands) { return emitError(unknownLoc, "duplicate function definition/declaration"); } - auto functionControl = spirv::symbolizeFunctionControl(operands[2]); - if (!functionControl) { + auto fnControl = spirv::symbolizeFunctionControl(operands[2]); + if (!fnControl) { return emitError(unknownLoc, "unknown Function Control: ") << operands[2]; } - if (functionControl.getValue() != spirv::FunctionControl::None) { - /// TODO: Handle different function controls - return emitError(unknownLoc, "unhandled Function Control: '") - << spirv::stringifyFunctionControl(functionControl.getValue()) - << "'"; - } Type fnType = getType(operands[3]); if (!fnType || !fnType.isa()) { @@ -831,8 +825,8 @@ LogicalResult Deserializer::processFunction(ArrayRef operands) { } std::string fnName = getFunctionSymbol(operands[1]); - auto funcOp = - opBuilder.create(unknownLoc, fnName, functionType); + auto funcOp = opBuilder.create( + unknownLoc, fnName, functionType, fnControl.getValue()); curFunction = funcMap[operands[1]] = funcOp; LLVM_DEBUG(llvm::dbgs() << "-- start function " << fnName << " (type = " << fnType << ", id = " << operands[1] << ") --\n"); diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp index 887def35f791a..1eda166a03256 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp @@ -775,8 +775,7 @@ LogicalResult Serializer::processFuncOp(spirv::FuncOp op) { operands.push_back(resTypeID); auto funcID = getOrCreateFunctionID(op.getName()); operands.push_back(funcID); - // TODO: Support other function control options. - operands.push_back(static_cast(spirv::FunctionControl::None)); + operands.push_back(static_cast(op.function_control())); operands.push_back(fnTypeID); encodeInstructionInto(functionHeader, spirv::Opcode::OpFunction, operands); diff --git a/mlir/test/Dialect/SPIRV/Serialization/module.mlir b/mlir/test/Dialect/SPIRV/Serialization/module.mlir index 29973e9e4d773..2e8f635d7d3cb 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/module.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/module.mlir @@ -1,13 +1,13 @@ // RUN: mlir-translate -test-spirv-roundtrip -split-input-file %s | FileCheck %s // CHECK: spv.module Logical GLSL450 requires #spv.vce { -// CHECK-NEXT: spv.func @foo() "None" { +// CHECK-NEXT: spv.func @foo() "Inline" { // CHECK-NEXT: spv.Return // CHECK-NEXT: } // CHECK-NEXT: } spv.module Logical GLSL450 requires #spv.vce { - spv.func @foo() -> () "None" { + spv.func @foo() -> () "Inline" { spv.Return } } From 413577a8790407d75ba834fa5668c2632fe1851e Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Wed, 30 Sep 2020 18:01:15 +0800 Subject: [PATCH 137/544] [X86] Support Intel Key Locker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Key Locker provides a mechanism to encrypt and decrypt data with an AES key without having access to the raw key value by converting AES keys into “handles”. These handles can be used to perform the same encryption and decryption operations as the original AES keys, but they only work on the current system and only until they are revoked. If software revokes Key Locker handles (e.g., on a reboot), then any previous handles can no longer be used. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D88398 --- clang/include/clang/Basic/BuiltinsX86.def | 19 + clang/include/clang/Driver/Options.td | 4 + clang/lib/Basic/Targets/X86.cpp | 12 + clang/lib/Basic/Targets/X86.h | 2 + clang/lib/CodeGen/CGBuiltin.cpp | 87 +++++ clang/lib/Headers/CMakeLists.txt | 2 + clang/lib/Headers/immintrin.h | 10 + clang/lib/Headers/keylocker_wide_intrin.h | 259 +++++++++++++ clang/lib/Headers/keylockerintrin.h | 343 ++++++++++++++++++ clang/test/CodeGen/X86/keylocker.c | 72 ++++ clang/test/CodeGen/attr-target-x86.c | 2 +- clang/test/Driver/x86-target-features.c | 10 + clang/test/Preprocessor/x86_target_features.c | 19 + llvm/include/llvm/IR/IntrinsicsX86.td | 53 +++ llvm/include/llvm/Support/X86TargetParser.def | 2 + llvm/lib/IR/Function.cpp | 4 +- llvm/lib/Support/Host.cpp | 5 + llvm/lib/Support/X86TargetParser.cpp | 6 +- llvm/lib/Target/X86/X86.td | 6 + llvm/lib/Target/X86/X86ISelLowering.cpp | 179 +++++++++ llvm/lib/Target/X86/X86InstrInfo.td | 5 + llvm/lib/Target/X86/X86InstrInfo.td.rej | 11 + llvm/lib/Target/X86/X86InstrKL.td | 66 ++++ llvm/lib/Target/X86/X86Subtarget.h | 8 + llvm/test/CodeGen/X86/keylocker-intrinsics.ll | 312 ++++++++++++++++ .../X86/KEYLOCKER/Keylocker-x86-32-att.txt | 276 ++++++++++++++ .../X86/KEYLOCKER/Keylocker-x86-32-intel.txt | 223 ++++++++++++ .../X86/KEYLOCKER/Keylocker-x86-64-att.txt | 277 ++++++++++++++ .../X86/KEYLOCKER/Keylocker-x86-64-intel.txt | 223 ++++++++++++ llvm/test/MC/X86/KEYLOCKER/keylocker-att.s | 205 +++++++++++ llvm/test/MC/X86/KEYLOCKER/keylocker-intel.s | 205 +++++++++++ .../MC/X86/KEYLOCKER/x86-64-keylocker-att.s | 205 +++++++++++ .../MC/X86/KEYLOCKER/x86-64-keylocker-intel.s | 205 +++++++++++ llvm/utils/TableGen/IntrinsicEmitter.cpp | 4 +- 34 files changed, 3317 insertions(+), 4 deletions(-) create mode 100644 clang/lib/Headers/keylocker_wide_intrin.h create mode 100644 clang/lib/Headers/keylockerintrin.h create mode 100644 clang/test/CodeGen/X86/keylocker.c create mode 100644 llvm/lib/Target/X86/X86InstrInfo.td.rej create mode 100644 llvm/lib/Target/X86/X86InstrKL.td create mode 100644 llvm/test/CodeGen/X86/keylocker-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-att.txt create mode 100644 llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-intel.txt create mode 100644 llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-att.txt create mode 100644 llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-intel.txt create mode 100644 llvm/test/MC/X86/KEYLOCKER/keylocker-att.s create mode 100644 llvm/test/MC/X86/KEYLOCKER/keylocker-intel.s create mode 100644 llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-att.s create mode 100644 llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-intel.s diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 35fb98352ec2b..e212d0a2a0cca 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1900,6 +1900,25 @@ TARGET_BUILTIN(__builtin_ia32_invpcid, "vUiv*", "nc", "invpcid") TARGET_BUILTIN(__builtin_ia32_enqcmd, "Ucv*vC*", "n", "enqcmd") TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") +// KEY LOCKER +TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vUiV2OiV2OiV2Oi", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey128, + "UiUiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey256, + "UiUiV2OiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesdec256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesencwide128kl, + "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesencwide256kl, + "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl, + "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl, + "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") + // SERIALIZE TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 09fdf50b1cb80..672a833c9d4da 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3253,6 +3253,10 @@ def minvpcid : Flag<["-"], "minvpcid">, Group; def mno_invpcid : Flag<["-"], "mno-invpcid">, Group; def mgfni : Flag<["-"], "mgfni">, Group; def mno_gfni : Flag<["-"], "mno-gfni">, Group; +def mkl : Flag<["-"], "mkl">, Group; +def mno_kl : Flag<["-"], "mno-kl">, Group; +def mwidekl : Flag<["-"], "mwidekl">, Group; +def mno_widekl : Flag<["-"], "mno-widekl">, Group; def mlwp : Flag<["-"], "mlwp">, Group; def mno_lwp : Flag<["-"], "mno-lwp">, Group; def mlzcnt : Flag<["-"], "mlzcnt">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b829dfac74fbf..5d89894c76283 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -276,6 +276,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasCLDEMOTE = true; } else if (Feature == "+rdpid") { HasRDPID = true; + } else if (Feature == "+kl") { + HasKL = true; + } else if (Feature == "+widekl") { + HasWIDEKL = true; } else if (Feature == "+retpoline-external-thunk") { HasRetpolineExternalThunk = true; } else if (Feature == "+sahf") { @@ -678,6 +682,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__PREFETCHWT1__"); if (HasCLZERO) Builder.defineMacro("__CLZERO__"); + if (HasKL) + Builder.defineMacro("__KL__"); + if (HasWIDEKL) + Builder.defineMacro("__WIDEKL__"); if (HasRDPID) Builder.defineMacro("__RDPID__"); if (HasCLDEMOTE) @@ -833,6 +841,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("fxsr", true) .Case("gfni", true) .Case("invpcid", true) + .Case("kl", true) + .Case("widekl", true) .Case("lwp", true) .Case("lzcnt", true) .Case("mmx", true) @@ -919,6 +929,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("fxsr", HasFXSR) .Case("gfni", HasGFNI) .Case("invpcid", HasINVPCID) + .Case("kl", HasKL) + .Case("widekl", HasWIDEKL) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 25dc9458c25a6..7b2b7dcf64604 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -127,6 +127,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasPTWRITE = false; bool HasINVPCID = false; bool HasENQCMD = false; + bool HasKL = false; // For key locker + bool HasWIDEKL = false; // For wide key locker bool HasAMXTILE = false; bool HasAMXINT8 = false; bool HasAMXBF16 = false; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 57804494d9a51..bb1c1d1aef338 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14037,6 +14037,93 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_psubusb128: case X86::BI__builtin_ia32_psubusw128: return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); + case X86::BI__builtin_ia32_encodekey128: + case X86::BI__builtin_ia32_encodekey256: + case X86::BI__builtin_ia32_aesenc128kl: + case X86::BI__builtin_ia32_aesdec128kl: + case X86::BI__builtin_ia32_aesenc256kl: + case X86::BI__builtin_ia32_aesdec256kl: + case X86::BI__builtin_ia32_aesencwide128kl: + case X86::BI__builtin_ia32_aesdecwide128kl: + case X86::BI__builtin_ia32_aesencwide256kl: + case X86::BI__builtin_ia32_aesdecwide256kl: { + int FirstReturnOp; + int ResultCount; + SmallVector InOps; + unsigned ID; + + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_encodekey128: + ID = Intrinsic::x86_encodekey128; + InOps = {Ops[0], Ops[1]}; + FirstReturnOp = 2; + ResultCount = 6; + break; + case X86::BI__builtin_ia32_encodekey256: + ID = Intrinsic::x86_encodekey256; + InOps = {Ops[0], Ops[1], Ops[2]}; + FirstReturnOp = 3; + ResultCount = 7; + break; + case X86::BI__builtin_ia32_aesenc128kl: + case X86::BI__builtin_ia32_aesdec128kl: + case X86::BI__builtin_ia32_aesenc256kl: + case X86::BI__builtin_ia32_aesdec256kl: { + InOps = {Ops[1], Ops[2]}; + FirstReturnOp = 0; + ResultCount = 1; + switch (BuiltinID) { + case X86::BI__builtin_ia32_aesenc128kl: + ID = Intrinsic::x86_aesenc128kl; + break; + case X86::BI__builtin_ia32_aesdec128kl: + ID = Intrinsic::x86_aesdec128kl; + break; + case X86::BI__builtin_ia32_aesenc256kl: + ID = Intrinsic::x86_aesenc256kl; + break; + case X86::BI__builtin_ia32_aesdec256kl: + ID = Intrinsic::x86_aesdec256kl; + break; + } + break; + } + case X86::BI__builtin_ia32_aesencwide128kl: + case X86::BI__builtin_ia32_aesdecwide128kl: + case X86::BI__builtin_ia32_aesencwide256kl: + case X86::BI__builtin_ia32_aesdecwide256kl: { + InOps = {Ops[0], Ops[9], Ops[10], Ops[11], Ops[12], Ops[13], + Ops[14], Ops[15], Ops[16]}; + FirstReturnOp = 1; + ResultCount = 8; + switch (BuiltinID) { + case X86::BI__builtin_ia32_aesencwide128kl: + ID = Intrinsic::x86_aesencwide128kl; + break; + case X86::BI__builtin_ia32_aesdecwide128kl: + ID = Intrinsic::x86_aesdecwide128kl; + break; + case X86::BI__builtin_ia32_aesencwide256kl: + ID = Intrinsic::x86_aesencwide256kl; + break; + case X86::BI__builtin_ia32_aesdecwide256kl: + ID = Intrinsic::x86_aesdecwide256kl; + break; + } + break; + } + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), InOps); + + for (int i = 0; i < ResultCount; ++i) { + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, i + 1), + Ops[FirstReturnOp + i]); + } + + return Builder.CreateExtractValue(Call, 0); + } } } diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index a9761f0490675..8c12d5ab935d8 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -72,6 +72,8 @@ set(files inttypes.h invpcidintrin.h iso646.h + keylockerintrin.h + keylocker_wide_intrin.h limits.h lwpintrin.h lzcntintrin.h diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index e9dff2310fdf7..1beade1be2484 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -471,6 +471,16 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__KL__) +#include +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__WIDEKL__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__) #include diff --git a/clang/lib/Headers/keylocker_wide_intrin.h b/clang/lib/Headers/keylocker_wide_intrin.h new file mode 100644 index 0000000000000..9b6c9ccab811f --- /dev/null +++ b/clang/lib/Headers/keylocker_wide_intrin.h @@ -0,0 +1,259 @@ +/*===-------------- keylocker_wide_intrin.h - KL_WIDE Intrinsics ------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _KEYLOCKERINTRIN_WIDE_H +#define _KEYLOCKERINTRIN_WIDE_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ + __min_vector_width__(128))) + +/// Encrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENCWIDE128KL instructions. +/// +/// \operation +/// Handle := MEM[__h+383:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesencwide128kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENCWIDE256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesencwide256kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDECWIDE128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesdecwide128kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDECWIDE256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle = ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) +/// If (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesdecwide256kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif /* _KEYLOCKERINTRIN_WIDE_H */ diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h new file mode 100644 index 0000000000000..5bd4fe59c6be0 --- /dev/null +++ b/clang/lib/Headers/keylockerintrin.h @@ -0,0 +1,343 @@ +/*===----------------- keylockerintrin.h - KL Intrinsics -------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _KEYLOCKERINTRIN_H +#define _KEYLOCKERINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ + __min_vector_width__(128))) + +/// Load internal wrapping key from __intkey, __enkey_lo and __enkey_hi. __ctl +/// will assigned to EAX, whch specifies the KeySource and whether backing up +/// the key is permitted. The 256-bit encryption key is loaded from the two +/// explicit operands (__enkey_lo and __enkey_hi). The 128-bit integrity key is +/// loaded from the implicit operand XMM0 which assigned by __intkey. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LOADIWKEY instructions. +/// +/// \operation +/// IF CPL > 0 // LOADKWKEY only allowed at ring 0 (supervisor mode) +/// GP (0) +/// FI +/// IF “LOADIWKEY exiting” VM execution control set +/// VMexit +/// FI +/// IF __ctl[4:1] > 1 // Reserved KeySource encoding used +/// GP (0) +/// FI +/// IF __ctl[31:5] != 0 // Reserved bit in __ctl is set +/// GP (0) +/// FI +/// IF __ctl[0] AND (CPUID.19H.ECX[0] == 0) // NoBackup is not supported on this part +/// GP (0) +/// FI +/// IF (__ctl[4:1] == 1) AND (CPUID.19H.ECX[1] == 0) // KeySource of 1 is not supported on this part +/// GP (0) +/// FI +/// IF (__ctl[4:1] == 0) // KeySource of 0. +/// IWKey.Encryption Key[127:0] := __enkey_hi[127:0]: +/// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] +/// IWKey.IntegrityKey[127:0] := __intkey[127:0] +/// IWKey.NoBackup := __ctl[0] +/// IWKey.KeySource := __ctl[4:1] +/// ZF := 0 +/// ELSE // KeySource of 1. See RDSEED definition for details of randomness +/// IF HW_NRND_GEN.ready == 1 // Full-entropy random data from RDSEED was received +/// IWKey.Encryption Key[127:0] := __enkey_hi[127:0] XOR HW_NRND_GEN.data[127:0] +/// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] XOR HW_NRND_GEN.data[255:128] +/// IWKey.Encryption Key[255:0] := __enkey_hi[127:0]:__enkey_lo[127:0] XOR HW_NRND_GEN.data[255:0] +/// IWKey.IntegrityKey[127:0] := __intkey[127:0] XOR HW_NRND_GEN.data[383:256] +/// IWKey.NoBackup := __ctl[0] +/// IWKey.KeySource := __ctl[4:1] +/// ZF := 0 +/// ELSE // Random data was not returned from RDSEED. IWKey was not loaded +/// ZF := 1 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_mm_loadiwkey (unsigned int __ctl, __m128i __intkey, + __m128i __enkey_lo, __m128i __enkey_hi) { + __builtin_ia32_loadiwkey (__ctl, __intkey, __enkey_lo, __enkey_hi); +} + +/// Wrap a 128-bit AES key from __key into a key handle and output in +/// ((__m128i*)__h) to ((__m128i*)__h) + 5 and a 32-bit value as return. +/// The explicit source operand __htype specifies handle restrictions. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the ENCODEKEY128 instructions. +/// +/// \operation +/// InputKey[127:0] := __key[127:0] +/// KeyMetadata[2:0] := __htype[2:0] +/// KeyMetadata[23:3] := 0 // Reserved for future usage +/// KeyMetadata[27:24] := 0 // KeyType is AES-128 (value of 0) +/// KeyMetadata[127:28] := 0 // Reserved for future usage +/// Handle[383:0] := WrapKey128(InputKey[127:0], KeyMetadata[127:0], +/// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) +/// dst[0] := IWKey.NoBackup +/// dst[4:1] := IWKey.KeySource[3:0] +/// dst[31:5] := 0 +/// MEM[__h+127:__h] := Handle[127:0] // AAD +/// MEM[__h+255:__h+128] := Handle[255:128] // Integrity Tag +/// MEM[__h+383:__h+256] := Handle[383:256] // CipherText +/// MEM[__h+511:__h+384] := 0 // Reserved for future usage +/// MEM[__h+639:__h+512] := 0 // Reserved for future usage +/// MEM[__h+767:__h+640] := 0 // Reserved for future usage +/// OF := 0 +/// SF := 0 +/// ZF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { + __m128i *__results = (__m128i*)__h; + + return __builtin_ia32_encodekey128(__htype, __key, + __results, + __results + 1, + __results + 2, + __results + 3, + __results + 4, + __results + 5); +} + +/// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then +/// output handle in ((__m128i*)__h) to ((__m128i*)__h) + 6 and +/// a 32-bit value as return. +/// The explicit source operand __htype specifies handle restrictions. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the ENCODEKEY256 instructions. +/// +/// \operation +/// InputKey[127:0] := __key_lo[127:0] +/// InputKey[255:128] := __key_hi[255:128] +/// KeyMetadata[2:0] := __htype[2:0] +/// KeyMetadata[23:3] := 0 // Reserved for future usage +/// KeyMetadata[27:24] := 1 // KeyType is AES-256 (value of 1) +/// KeyMetadata[127:28] := 0 // Reserved for future usage +/// Handle[511:0] := WrapKey256(InputKey[255:0], KeyMetadata[127:0], +/// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) +/// dst[0] := IWKey.NoBackup +/// dst[4:1] := IWKey.KeySource[3:0] +/// dst[31:5] := 0 +/// MEM[__h+127:__h] := Handle[127:0] // AAD +/// MEM[__h+255:__h+128] := Handle[255:128] // Tag +/// MEM[__h+383:__h+256] := Handle[383:256] // CipherText[127:0] +/// MEM[__h+511:__h+384] := Handle[511:384] // CipherText[255:128] +/// MEM[__h+639:__h+512] := 0 // Reserved for future usage +/// MEM[__h+767:__h+640] := 0 // Reserved for future usage +/// MEM[__h+895:__h+768] := 0 Integrity// Reserved for future usage +/// OF := 0 +/// SF := 0 +/// ZF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, + void *__h) { + __m128i *__results = (__m128i*)__h; + + return __builtin_ia32_encodekey256(__htype, __key_lo, __key_hi, + __results, + __results + 1, + __results + 2, + __results + 3, + __results + 4, + __results + 5, + __results + 6); +} + +/// The AESENC128KL performs 10 rounds of AES to encrypt the __idata using +/// the 128-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENC128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[383:256] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesenc128kl(__odata, __idata, __h); +} + +/// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using +/// the 256-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENC256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] // Load is not guaranteed to be atomic. +/// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesenc256kl(__odata, __idata, __h); +} + +/// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using +/// the 128-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDEC128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. +/// IllegalHandle := (HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[383:256] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesdec128kl(__odata, __idata, __h); +} + +/// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using +/// the 256-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDEC256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle := (HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[383:256] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesdec256kl(__odata, __idata, __h); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* _KEYLOCKERINTRIN_H */ diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c new file mode 100644 index 0000000000000..835bdd279ef1f --- /dev/null +++ b/clang/test/CodeGen/X86/keylocker.c @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +void test_loadiwkey(unsigned int ctl, __m128i intkey, __m128i enkey_lo, __m128i enkey_hi) { + //CHECK-LABEL: @test_loadiwkey + //CHECK: @llvm.x86.loadiwkey + _mm_loadiwkey(ctl, intkey, enkey_lo, enkey_hi); +} + +unsigned int test_encodekey128_u32(unsigned int htype, __m128i key, void *h) { + //CHECK-LABEL: @test_encodekey128_u32 + //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %{{.*}}, <2 x i64> %{{.*}}) + return _mm_encodekey128_u32(htype, key, h); +} + +unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i key_hi, void *h) { + //CHECK-LABEL: @test_encodekey256_u32 + //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + return _mm_encodekey256_u32(htype, key_lo, key_hi, h); +} + +unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) { + //CHECK-LABEL: @test_mm_aesenc256kl_u8 + //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + return _mm_aesenc256kl_u8(odata, idata, h); +} + +unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) { + //CHECK-LABEL: @test_mm_aesdec256kl_u8 + //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + return _mm_aesdec256kl_u8(odata, idata, h); +} + +unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) { + //CHECK-LABEL: @test_mm_aesenc128kl_u8 + //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + return _mm_aesenc128kl_u8(odata, idata, h); +} + +unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) { + //CHECK-LABEL: @test_mm_aesdec128kl_u8 + //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + return _mm_aesdec128kl_u8(odata, idata, h); +} + +unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { + //CHECK-LABEL: @test__mm_aesencwide128kl + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + return _mm_aesencwide128kl_u8(odata, idata, h); +} + +unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { + //CHECK-LABEL: @test__mm_aesdecwide128kl + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + return _mm_aesdecwide128kl_u8(odata, idata, h); +} + +unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { + //CHECK-LABEL: @test__mm_aesencwide256kl + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + return _mm_aesencwide256kl_u8(odata, idata, h); +} + +unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { + //CHECK-LABEL: @test__mm_aesdecwide256kl + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + return _mm_aesdecwide256kl_u8(odata, idata, h); +} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index 304e5b78d3466..738b65b111310 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -50,7 +50,7 @@ int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; } // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 85a9374ab9057..9de728c19c7ca 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -254,6 +254,16 @@ // TSXLDTRK: "-target-feature" "+tsxldtrk" // NO-TSXLDTRK: "-target-feature" "-tsxldtrk" +// RUN: %clang -target i386-linux-gnu -mkl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=KL %s +// RUN: %clang -target i386-linux-gnu -mno-kl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-KL %s +// KL: "-target-feature" "+kl" +// NO-KL: "-target-feature" "-kl" + +// RUN: %clang -target i386-linux-gnu -mwidekl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=WIDE_KL %s +// RUN: %clang -target i386-linux-gnu -mno-widekl %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-WIDE_KL %s +// WIDE_KL: "-target-feature" "+widekl" +// NO-WIDE_KL: "-target-feature" "-widekl" + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-tile %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AMX-TILE %s // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-tile %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AMX-TILE %s // AMX-TILE: "-target-feature" "+amx-tile" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 4a46a131afa74..59bc9d6ab531e 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -486,6 +486,25 @@ // NOVP2INTERSECT-NOT: #define __AVX512VP2INTERSECT__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mkl -x c -E -dM -o - %s | FileCheck -check-prefix=KEYLOCKER %s +// KEYLOCKER: #define __KL__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-kl -x c -E -dM -o - %s | FileCheck -check-prefix=NOKEYLOCKER %s +// NOKEYLOCKER-NOT: #define __KL__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mwidekl -x c -E -dM -o - %s | FileCheck -check-prefix=KEYLOCKERW %s +// KEYLOCKERW: #define __KL__ 1 +// KEYLOCKERW: #define __WIDEKL__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-widekl -x c -E -dM -o - %s | FileCheck -check-prefix=NOKEYLOCKERW %s +// NOKEYLOCKERW-NOT: #define __KL__ 1 +// NOKEYLOCKERW-NOT: #define __WIDEKL__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mwidekl -mno-kl -x c -E -dM -o - %s | FileCheck -check-prefix=NOKEYLOCKERW2 %s +// NOKEYLOCKERW2-NOT: #define __KL__ 1 +// NOKEYLOCKERW2-NOT: #define __WIDEKL__ 1 + // RUN: %clang -target i386-unknown-unknown -march=atom -menqcmd -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=ENQCMD %s // ENQCMD: #define __ENQCMD__ 1 diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 51ecb97885643..5708a761919f5 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4948,6 +4948,59 @@ let TargetPrefix = "x86" in { def int_x86_xresldtrk : GCCBuiltin<"__builtin_ia32_xresldtrk">, Intrinsic<[], [], []>; } + +//===----------------------------------------------------------------------===// +// Key Locker +let TargetPrefix = "x86" in { + def int_x86_loadiwkey : GCCBuiltin<"__builtin_ia32_loadiwkey">, + Intrinsic<[], [llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + []>; + def int_x86_encodekey128 : + Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_i32_ty, llvm_v2i64_ty], []>; + def int_x86_encodekey256 : + Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesenc128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesdec128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesenc256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesdec256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesencwide128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesdecwide128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesencwide256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesdecwide256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; +} + //===----------------------------------------------------------------------===// // AMX - Intel AMX extensions diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index e3998c99a50a6..2a803ca7a6891 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -154,6 +154,8 @@ X86_FEATURE (F16C, "f16c") X86_FEATURE (FSGSBASE, "fsgsbase") X86_FEATURE (FXSR, "fxsr") X86_FEATURE (INVPCID, "invpcid") +X86_FEATURE (KL, "kl") +X86_FEATURE (WIDEKL, "widekl") X86_FEATURE (LWP, "lwp") X86_FEATURE (LZCNT, "lzcnt") X86_FEATURE (MOVBE, "movbe") diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 16cf1bd8a117d..8d741c3125a84 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -833,7 +833,8 @@ enum IIT_Info { IIT_SUBDIVIDE4_ARG = 45, IIT_VEC_OF_BITCASTS_TO_INT = 46, IIT_V128 = 47, - IIT_BF16 = 48 + IIT_BF16 = 48, + IIT_STRUCT9 = 49 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -995,6 +996,7 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_EMPTYSTRUCT: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0)); return; + case IIT_STRUCT9: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT8: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT7: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT6: ++StructElts; LLVM_FALLTHROUGH; diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 26534580d02d3..0f674bbcdc1bb 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1469,6 +1469,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); + Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); @@ -1509,6 +1510,10 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); + bool HasLeaf19 = + MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); + Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); + return true; } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index b7d9bd4f865c9..99836b8460def 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -194,7 +194,7 @@ static constexpr FeatureBitset FeaturesICLServer = FeaturesICLClient | FeaturePCONFIG | FeatureWBNOINVD; static constexpr FeatureBitset FeaturesTigerlake = FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B | - FeatureMOVDIRI | FeatureSHSTK; + FeatureMOVDIRI | FeatureSHSTK | FeatureKL | FeatureWIDEKL; static constexpr FeatureBitset FeaturesSapphireRapids = FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 | FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE | FeatureENQCMD | @@ -538,6 +538,10 @@ static constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {}; static constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE; static constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; +// Key Locker Features +static constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2; +static constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL; + static constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = { #define X86_FEATURE(ENUM, STR) {{STR}, ImpliedFeatures##ENUM}, #include "llvm/Support/X86TargetParser.def" diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index f2651d658d71c..e5d47a0ac3255 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -279,6 +279,12 @@ def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", "Wait and pause enhancements">; def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", "Has ENQCMD instructions">; +def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", + "Support Key Locker kl Instructions", + [FeatureSSE2]>; +def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", + "Support Key Locker wide Instructions", + [FeatureKL]>; def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", "Has serialize instruction">; def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4b3adc7dcfbc9..d0fd1046fdeb7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25966,6 +25966,185 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, Op->getOperand(3), Op->getOperand(4)}); return Chain; } + case Intrinsic::x86_encodekey128: + case Intrinsic::x86_encodekey256: { + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue); + SDValue Chain = Op.getOperand(0); + bool IsEK256 = false; + Chain = DAG.getCopyToReg(Chain, DL, X86::XMM0, Op->getOperand(3), + SDValue()); + + unsigned Opcode; + + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_encodekey128: + Opcode = X86::ENCODEKEY128; + break; + case Intrinsic::x86_encodekey256: + Opcode = X86::ENCODEKEY256; + Chain = DAG.getCopyToReg(Chain, DL, X86::XMM1, Op->getOperand(4), + Chain.getValue(1)); + IsEK256 = true; + break; + } + + SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, + {Op.getOperand(2), Chain, + Chain.getValue(1)}); + + Chain = SDValue(Res, 1); + + SDValue XMM0 = DAG.getCopyFromReg(Chain, DL, X86::XMM0, MVT::v16i8, + SDValue(Res, 2)); + SDValue XMM1 = DAG.getCopyFromReg(XMM0.getValue(1), DL, X86::XMM1, + MVT::v16i8, XMM0.getValue(2)); + SDValue XMM2 = DAG.getCopyFromReg(XMM1.getValue(1), DL, X86::XMM2, + MVT::v16i8, XMM1.getValue(2)); + SDValue XMM3, XMM4; + if (IsEK256) { + XMM3 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM3, + MVT::v16i8, XMM2.getValue(2)); + XMM4 = DAG.getCopyFromReg(XMM3.getValue(1), DL, X86::XMM4, + MVT::v16i8, XMM3.getValue(2)); + } else { + XMM4 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM4, + MVT::v16i8, XMM2.getValue(2)); + } + SDValue XMM5 = DAG.getCopyFromReg(XMM4.getValue(1), DL, X86::XMM5, + MVT::v16i8, XMM4.getValue(2)); + SDValue XMM6 = DAG.getCopyFromReg(XMM5.getValue(1), DL, X86::XMM6, + MVT::v16i8, XMM5.getValue(2)); + + if (IsEK256) { + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), + {SDValue(Res, 0), + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, Chain}); + } else { + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), + {SDValue(Res, 0), + XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, Chain}); + } + } + case Intrinsic::x86_aesenc128kl: + case Intrinsic::x86_aesdec128kl: + case Intrinsic::x86_aesenc256kl: + case Intrinsic::x86_aesdec256kl: { + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(MVT::v16i8, MVT::Other, MVT::Glue); + SDValue Chain = Op.getOperand(0); + unsigned Opcode; + + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_aesenc128kl: + Opcode = X86::AESENC128KL; + break; + case Intrinsic::x86_aesdec128kl: + Opcode = X86::AESDEC128KL; + break; + case Intrinsic::x86_aesenc256kl: + Opcode = X86::AESENC256KL; + break; + case Intrinsic::x86_aesdec256kl: + Opcode = X86::AESDEC256KL; + break; + } + + SDValue XMM = Op.getOperand(2); + SDValue Base = Op.getOperand(3); + SDValue Index = DAG.getRegister(0, MVT::i32); + SDValue Scale = DAG.getTargetConstant(1, DL, MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, DL, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + + SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, {XMM, Base, Scale, Index, + Disp, Segment, Chain}); + Chain = SDValue(Res, 1); + SDValue EFLAGS = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, + SDValue(Res, 2)); + SDValue ZF = getSETCC(X86::COND_E, EFLAGS.getValue(0), DL, DAG); + + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), + {ZF, SDValue(Res, 0), EFLAGS.getValue(1)}); + } + case Intrinsic::x86_aesencwide128kl: + case Intrinsic::x86_aesdecwide128kl: + case Intrinsic::x86_aesencwide256kl: + case Intrinsic::x86_aesdecwide256kl: { + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = Op.getOperand(0); + unsigned Opcode; + + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_aesencwide128kl: + Opcode = X86::AESENCWIDE128KL; + break; + case Intrinsic::x86_aesdecwide128kl: + Opcode = X86::AESDECWIDE128KL; + break; + case Intrinsic::x86_aesencwide256kl: + Opcode = X86::AESENCWIDE256KL; + break; + case Intrinsic::x86_aesdecwide256kl: + Opcode = X86::AESDECWIDE256KL; + break; + } + + SDValue Base = Op.getOperand(2); + SDValue Index = DAG.getRegister(0, MVT::i32); + SDValue Scale = DAG.getTargetConstant(1, DL, MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, DL, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + + Chain = DAG.getCopyToReg(Chain, DL, X86::XMM0, Op->getOperand(3), + SDValue()); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM1, + Op->getOperand(4), Chain.getValue(1)); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM2, + Op->getOperand(5), Chain.getValue(1)); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM3, + Op->getOperand(6), Chain.getValue(1)); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM4, + Op->getOperand(7), Chain.getValue(1)); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM5, + Op->getOperand(8), Chain.getValue(1)); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM6, + Op->getOperand(9), Chain.getValue(1)); + Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM7, + Op->getOperand(10),Chain.getValue(1)); + + SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, {Base, Scale, Index, + Disp, Segment, Chain, + Chain.getValue(1)}); + + Chain = SDValue(Res, 0); + SDValue EFLAGS = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, + SDValue(Res, 1)); + SDValue ZF = getSETCC(X86::COND_E, EFLAGS.getValue(0), DL, DAG); + SDValue XMM0 = DAG.getCopyFromReg(EFLAGS.getValue(1), DL, X86::XMM0, + MVT::v16i8, EFLAGS.getValue(2)); + SDValue XMM1 = DAG.getCopyFromReg(XMM0.getValue(1), DL, X86::XMM1, + MVT::v16i8, XMM0.getValue(2)); + SDValue XMM2 = DAG.getCopyFromReg(XMM1.getValue(1), DL, X86::XMM2, + MVT::v16i8, XMM1.getValue(2)); + SDValue XMM3 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM3, + MVT::v16i8, XMM2.getValue(2)); + SDValue XMM4 = DAG.getCopyFromReg(XMM3.getValue(1), DL, X86::XMM4, + MVT::v16i8, XMM3.getValue(2)); + SDValue XMM5 = DAG.getCopyFromReg(XMM4.getValue(1), DL, X86::XMM5, + MVT::v16i8, XMM4.getValue(2)); + SDValue XMM6 = DAG.getCopyFromReg(XMM5.getValue(1), DL, X86::XMM6, + MVT::v16i8, XMM5.getValue(2)); + SDValue XMM7 = DAG.getCopyFromReg(XMM6.getValue(1), DL, X86::XMM7, + MVT::v16i8, XMM6.getValue(2)); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), + {ZF, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM7.getValue(1)}); + } } return SDValue(); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 99a9ce2fc7e61..d13ba5dbc0eb0 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -971,6 +971,8 @@ def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; +def HasKL : Predicate<"Subtarget->hasKL()">; +def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">; def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">; def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">; def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">; @@ -3094,6 +3096,9 @@ include "X86InstrSGX.td" include "X86InstrTDX.td" +// Key Locker instructions +include "X86InstrKL.td" + // AMX instructions include "X86InstrAMX.td" diff --git a/llvm/lib/Target/X86/X86InstrInfo.td.rej b/llvm/lib/Target/X86/X86InstrInfo.td.rej new file mode 100644 index 0000000000000..5c0a632b55a70 --- /dev/null +++ b/llvm/lib/Target/X86/X86InstrInfo.td.rej @@ -0,0 +1,11 @@ +diff a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td (rejected hunks) +@@ -3092,6 +3094,9 @@ include "X86InstrSVM.td" + include "X86InstrTSX.td" + include "X86InstrSGX.td" + ++// Key Locker instructions ++include "X86InstrKL.td" ++ + // AMX instructions + include "X86InstrAMX.td" + diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td new file mode 100644 index 0000000000000..452410891bd86 --- /dev/null +++ b/llvm/lib/Target/X86/X86InstrKL.td @@ -0,0 +1,66 @@ +//===---------------------------*-tablegen-*-------------------------------===// +//===------------- X86InstrKL.td - KL Instruction Set Extension -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the Intel key locker +// instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Key Locker instructions + +let SchedRW = [WriteSystem], Predicates = [HasKL] in { + let Uses = [XMM0, EAX] in { + def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128X:$src1, VR128X:$src2), + "loadiwkey\t{$src2, $src1|$src1, $src2}", + [(int_x86_loadiwkey EAX, XMM0, VR128X:$src1, VR128X:$src2)]>, T8XS; + } + + let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6] in { + def ENCODEKEY128 : I<0xFA, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS; + } + + let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6] in { + def ENCODEKEY256 : I<0xFB, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS; + } + + let Constraints = "$src1 = $dst", + Defs = [EFLAGS] in { + def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + "aesenc128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + + def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + "aesdec128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + + def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + "aesenc256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + + def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + "aesdec256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + } + +} // SchedRW, Predicates + +let SchedRW = [WriteSystem], Predicates = [HasWIDEKL] in { + let Uses = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7], + Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in { + def AESENCWIDE128KL : I<0xD8, MRM0m, (outs), (ins opaquemem:$src), + "aesencwide128kl\t$src", []>, T8XS; + def AESDECWIDE128KL : I<0xD8, MRM1m, (outs), (ins opaquemem:$src), + "aesdecwide128kl\t$src", []>, T8XS; + def AESENCWIDE256KL : I<0xD8, MRM2m, (outs), (ins opaquemem:$src), + "aesencwide256kl\t$src", []>, T8XS; + def AESDECWIDE256KL : I<0xD8, MRM3m, (outs), (ins opaquemem:$src), + "aesdecwide256kl\t$src", []>, T8XS; + } + +} // SchedRW, Predicates diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 923f8105870fc..263be40639db8 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -395,6 +395,12 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Processor supports PCONFIG instruction bool HasPCONFIG = false; + /// Processor support key locker instructions + bool HasKL = false; + + /// Processor support key locker wide instructions + bool HasWIDEKL = false; + /// Processor supports SERIALIZE instruction bool HasSERIALIZE = false; @@ -728,6 +734,8 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasSGX() const { return HasSGX; } bool hasINVPCID() const { return HasINVPCID; } bool hasENQCMD() const { return HasENQCMD; } + bool hasKL() const { return HasKL; } + bool hasWIDEKL() const { return HasWIDEKL; } bool hasSERIALIZE() const { return HasSERIALIZE; } bool hasTSXLDTRK() const { return HasTSXLDTRK; } bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll new file mode 100644 index 0000000000000..472eed484a16e --- /dev/null +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll @@ -0,0 +1,312 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 + +declare void @llvm.x86.loadiwkey(i32, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) +declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) + +define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { +; X64-LABEL: test_loadiwkey: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: loadiwkey %xmm2, %xmm1 +; X64-NEXT: retq +; +; X32-LABEL: test_loadiwkey: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: loadiwkey %xmm2, %xmm1 +; X32-NEXT: retl +entry: + tail call void @llvm.x86.loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) + ret void +} + +define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) { +; X64-LABEL: test_encodekey128_u32: +; X64: # %bb.0: # %entry +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: encodekey128 %edi, %eax +; X64-NEXT: movaps %xmm0, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm2, (%rcx) +; X64-NEXT: movaps %xmm4, (%r8) +; X64-NEXT: movaps %xmm5, (%r9) +; X64-NEXT: movaps %xmm6, (%r10) +; X64-NEXT: retq +; +; X32-LABEL: test_encodekey128_u32: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: pushl %edi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: .cfi_offset %esi, -20 +; X32-NEXT: .cfi_offset %edi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: encodekey128 %eax, %eax +; X32-NEXT: vmovaps %xmm0, (%ebp) +; X32-NEXT: vmovaps %xmm1, (%ebx) +; X32-NEXT: vmovaps %xmm2, (%edi) +; X32-NEXT: vmovaps %xmm4, (%esi) +; X32-NEXT: vmovaps %xmm5, (%edx) +; X32-NEXT: vmovaps %xmm6, (%ecx) +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: popl %edi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: popl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +entry: + %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) + %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %h0, align 16 + %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %h1, align 16 + %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %3, <2 x i64>* %h2, align 16 + %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %4, <2 x i64>* %h3, align 16 + %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %5, <2 x i64>* %h4, align 16 + %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %6, <2 x i64>* %h5, align 16 + %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %7 +} + +define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x i64>* nocapture readnone %h6) { +; X64-LABEL: test_encodekey256_u32: +; X64: # %bb.0: # %entry +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: encodekey256 %edi, %eax +; X64-NEXT: movaps %xmm0, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm2, (%rcx) +; X64-NEXT: movaps %xmm3, (%r8) +; X64-NEXT: movaps %xmm4, (%r9) +; X64-NEXT: movaps %xmm5, (%r10) +; X64-NEXT: retq +; +; X32-LABEL: test_encodekey256_u32: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: pushl %edi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: .cfi_offset %esi, -20 +; X32-NEXT: .cfi_offset %edi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: encodekey256 %eax, %eax +; X32-NEXT: vmovaps %xmm0, (%ebp) +; X32-NEXT: vmovaps %xmm1, (%ebx) +; X32-NEXT: vmovaps %xmm2, (%edi) +; X32-NEXT: vmovaps %xmm3, (%esi) +; X32-NEXT: vmovaps %xmm4, (%edx) +; X32-NEXT: vmovaps %xmm5, (%ecx) +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: popl %edi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: popl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +entry: + %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) + %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %h0, align 16 + %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %h1, align 16 + %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %3, <2 x i64>* %h2, align 16 + %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %4, <2 x i64>* %h3, align 16 + %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %5, <2 x i64>* %h4, align 16 + %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %6, <2 x i64>* %h5, align 16 + %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %7 +} + +define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h) { +; X64-LABEL: test_mm_aesenc128kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: aesenc128kl (%rdi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesenc128kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: aesenc128kl (%eax), %xmm0 +; X32-NEXT: sete %al +; X32-NEXT: retl +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, i8* %h) + %1 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %1 +} + +define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, i8* %h) { +; X64-LABEL: test_mm_aesdec128kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: aesdec128kl (%rdi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesdec128kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: aesdec128kl (%eax), %xmm0 +; X32-NEXT: sete %al +; X32-NEXT: retl +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, i8* %h) + %1 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %1 +} + +define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, i8* %h) { +; X64-LABEL: test_mm_aesenc256kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: aesenc256kl (%rdi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesenc256kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: aesenc256kl (%eax), %xmm0 +; X32-NEXT: sete %al +; X32-NEXT: retl +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, i8* %h) + %1 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %1 +} + +define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, i8* %h) { +; X64-LABEL: test_mm_aesdec256kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: aesdec256kl (%rdi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesdec256kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: aesdec256kl (%eax), %xmm0 +; X32-NEXT: sete %al +; X32-NEXT: retl +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, i8* %h) + %1 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %1 +} + +define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) { +; X64-LABEL: test_mm_aesencwide128kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: aesencwide128kl (%rdi) +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesencwide128kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: vmovaps 24(%ebp), %xmm3 +; X32-NEXT: vmovaps 40(%ebp), %xmm4 +; X32-NEXT: vmovaps 56(%ebp), %xmm5 +; X32-NEXT: vmovaps 72(%ebp), %xmm6 +; X32-NEXT: vmovaps 88(%ebp), %xmm7 +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: aesencwide128kl (%eax) +; X32-NEXT: sete %al +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: retl +entry: + %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %1 +} + +define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) { +; X64-LABEL: test_mm_aesencwide256kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: aesencwide256kl (%rdi) +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesencwide256kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: vmovaps 24(%ebp), %xmm3 +; X32-NEXT: vmovaps 40(%ebp), %xmm4 +; X32-NEXT: vmovaps 56(%ebp), %xmm5 +; X32-NEXT: vmovaps 72(%ebp), %xmm6 +; X32-NEXT: vmovaps 88(%ebp), %xmm7 +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: aesencwide256kl (%eax) +; X32-NEXT: sete %al +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: retl +entry: + %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %1 +} diff --git a/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-att.txt b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-att.txt new file mode 100644 index 0000000000000..45f2d1164faac --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-att.txt @@ -0,0 +1,276 @@ +# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s +# CHECK: loadiwkey %xmm2, %xmm3 +0xf3 0x0f 0x38 0xdc 0xda + +# CHECK: loadiwkey %xmm2, %xmm6 +0xf3 0x0f 0x38 0xdc 0xf2 + +# CHECK: encodekey128 %eax, %ebx +0xf3 0x0f 0x38 0xfa 0xd8 + +# CHECK: encodekey128 %eax, %edx +0xf3 0x0f 0x38 0xfa 0xd0 + +# CHECK: encodekey256 %eax, %ebx +0xf3 0x0f 0x38 0xfb 0xd8 + +# CHECK: encodekey256 %eax, %edx +0xf3 0x0f 0x38 0xfb 0xd0 + +# CHECK: aesenc128kl 126(%edx), %xmm2 +0xf3 0x0f 0x38 0xdc 0x52 0x7e + +# CHECK: aesdec128kl 126(%edx), %xmm2 +0xf3 0x0f 0x38 0xdd 0x52 0x7e + +# CHECK: aesenc256kl 126(%edx), %xmm2 +0xf3 0x0f 0x38 0xde 0x52 0x7e + +# CHECK: aesdec256kl 126(%edx), %xmm2 +0xf3 0x0f 0x38 0xdf 0x52 0x7e + +# CHECK: aesencwide128kl (%ebx) +0xf3 0x0f 0x38 0xd8 0x03 + +# CHECK: aesencwide128kl 126(%edx) +0xf3 0x0f 0x38 0xd8 0x42 0x7e + +# CHECK: aesdecwide128kl (%ebx) +0xf3 0x0f 0x38 0xd8 0x0b + +# CHECK: aesdecwide128kl 126(%edx) +0xf3 0x0f 0x38 0xd8 0x4a 0x7e + +# CHECK: aesencwide256kl (%ebx) +0xf3 0x0f 0x38 0xd8 0x13 + +# CHECK: aesencwide256kl 126(%edx) +0xf3 0x0f 0x38 0xd8 0x52 0x7e + +# CHECK: aesdecwide256kl (%ebx) +0xf3 0x0f 0x38 0xd8 0x1b + +# CHECK: aesdecwide256kl 126(%edx) +0xf3 0x0f 0x38 0xd8 0x5a 0x7e + +# CHECK: aesdec128kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xdd,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xdd,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xdd,0x10 + +# CHECK: aesdec128kl -1536(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xdd,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl 6096(%ecx), %xmm2 +0xf3,0x0f,0x38,0xdd,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl -6144(%edx), %xmm2 +0xf3,0x0f,0x38,0xdd,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xdf,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xdf,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xdf,0x10 + +# CHECK: aesdec256kl -2048(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xdf,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl 8128(%ecx), %xmm2 +0xf3,0x0f,0x38,0xdf,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl -8192(%edx), %xmm2 +0xf3,0x0f,0x38,0xdf,0x92,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xdc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xdc,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xdc,0x10 + +# CHECK: aesenc128kl -1536(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xdc,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl 6096(%ecx), %xmm2 +0xf3,0x0f,0x38,0xdc,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl -6144(%edx), %xmm2 +0xf3,0x0f,0x38,0xdc,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xde,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xde,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xde,0x10 + +# CHECK: aesenc256kl -2048(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xde,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl 8128(%ecx), %xmm2 +0xf3,0x0f,0x38,0xde,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl -8192(%edx), %xmm2 +0xf3,0x0f,0x38,0xde,0x92,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey %xmm3, %xmm2 +0xf3,0x0f,0x38,0xdc,0xd3 + +# CHECK: aesdec128kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xdd,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xdd,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xdd,0x10 + +# CHECK: aesdec128kl -1536(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xdd,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl 6096(%ecx), %xmm2 +0xf3,0x0f,0x38,0xdd,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl -6144(%edx), %xmm2 +0xf3,0x0f,0x38,0xdd,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xdf,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xdf,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xdf,0x10 + +# CHECK: aesdec256kl -2048(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xdf,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl 8128(%ecx), %xmm2 +0xf3,0x0f,0x38,0xdf,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl -8192(%edx), %xmm2 +0xf3,0x0f,0x38,0xdf,0x92,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xdc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xdc,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xdc,0x10 + +# CHECK: aesenc128kl -1536(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xdc,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl 6096(%ecx), %xmm2 +0xf3,0x0f,0x38,0xdc,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl -6144(%edx), %xmm2 +0xf3,0x0f,0x38,0xdc,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl 268435456(%esp,%esi,8), %xmm2 +0xf3,0x0f,0x38,0xde,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl 291(%edi,%eax,4), %xmm2 +0xf3,0x0f,0x38,0xde,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl (%eax), %xmm2 +0xf3,0x0f,0x38,0xde,0x10 + +# CHECK: aesenc256kl -2048(,%ebp,2), %xmm2 +0xf3,0x0f,0x38,0xde,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl 8128(%ecx), %xmm2 +0xf3,0x0f,0x38,0xde,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl -8192(%edx), %xmm2 +0xf3,0x0f,0x38,0xde,0x92,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey %xmm3, %xmm2 +0xf3,0x0f,0x38,0xdc,0xd3 + +# CHECK: aesdecwide128kl 268435456(%esp,%esi,8) +0xf3,0x0f,0x38,0xd8,0x8c,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide128kl 291(%edi,%eax,4) +0xf3,0x0f,0x38,0xd8,0x8c,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide128kl (%eax) +0xf3,0x0f,0x38,0xd8,0x08 + +# CHECK: aesdecwide128kl -1536(,%ebp,2) +0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdecwide128kl 6096(%ecx) +0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00 + +# CHECK: aesdecwide128kl -6144(%edx) +0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff + +# CHECK: aesdecwide256kl 268435456(%esp,%esi,8) +0xf3,0x0f,0x38,0xd8,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide256kl 291(%edi,%eax,4) +0xf3,0x0f,0x38,0xd8,0x9c,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide256kl (%eax) +0xf3,0x0f,0x38,0xd8,0x18 + +# CHECK: aesdecwide256kl -2048(,%ebp,2) +0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdecwide256kl 8128(%ecx) +0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdecwide256kl -8192(%edx) +0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff + +# CHECK: aesencwide128kl 268435456(%esp,%esi,8) +0xf3,0x0f,0x38,0xd8,0x84,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide128kl 291(%edi,%eax,4) +0xf3,0x0f,0x38,0xd8,0x84,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide128kl (%eax) +0xf3,0x0f,0x38,0xd8,0x00 + +# CHECK: aesencwide128kl -1536(,%ebp,2) +0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesencwide128kl 6096(%ecx) +0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00 + +# CHECK: aesencwide128kl -6144(%edx) +0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff + +# CHECK: aesencwide256kl 268435456(%esp,%esi,8) +0xf3,0x0f,0x38,0xd8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide256kl 291(%edi,%eax,4) +0xf3,0x0f,0x38,0xd8,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide256kl (%eax) +0xf3,0x0f,0x38,0xd8,0x10 + +# CHECK: aesencwide256kl -2048(,%ebp,2) +0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesencwide256kl 8128(%ecx) +0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesencwide256kl -8192(%edx) +0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff diff --git a/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-intel.txt b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-intel.txt new file mode 100644 index 0000000000000..983abeb780601 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-32-intel.txt @@ -0,0 +1,223 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: aesdec128kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xdd,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xdd,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl xmm2, [eax] +0xf3,0x0f,0x38,0xdd,0x10 + +# CHECK: aesdec128kl xmm2, [2*ebp - 1536] +0xf3,0x0f,0x38,0xdd,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl xmm2, [ecx + 6096] +0xf3,0x0f,0x38,0xdd,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl xmm2, [edx - 6144] +0xf3,0x0f,0x38,0xdd,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xdf,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xdf,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl xmm2, [eax] +0xf3,0x0f,0x38,0xdf,0x10 + +# CHECK: aesdec256kl xmm2, [2*ebp - 2048] +0xf3,0x0f,0x38,0xdf,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl xmm2, [ecx + 8128] +0xf3,0x0f,0x38,0xdf,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl xmm2, [edx - 8192] +0xf3,0x0f,0x38,0xdf,0x92,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xdc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xdc,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl xmm2, [eax] +0xf3,0x0f,0x38,0xdc,0x10 + +# CHECK: aesenc128kl xmm2, [2*ebp - 1536] +0xf3,0x0f,0x38,0xdc,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl xmm2, [ecx + 6096] +0xf3,0x0f,0x38,0xdc,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl xmm2, [edx - 6144] +0xf3,0x0f,0x38,0xdc,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xde,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xde,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl xmm2, [eax] +0xf3,0x0f,0x38,0xde,0x10 + +# CHECK: aesenc256kl xmm2, [2*ebp - 2048] +0xf3,0x0f,0x38,0xde,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl xmm2, [ecx + 8128] +0xf3,0x0f,0x38,0xde,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl xmm2, [edx - 8192] +0xf3,0x0f,0x38,0xde,0x92,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey xmm2, xmm3 +0xf3,0x0f,0x38,0xdc,0xd3 + +# CHECK: aesdec128kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xdd,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xdd,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl xmm2, [eax] +0xf3,0x0f,0x38,0xdd,0x10 + +# CHECK: aesdec128kl xmm2, [2*ebp - 1536] +0xf3,0x0f,0x38,0xdd,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl xmm2, [ecx + 6096] +0xf3,0x0f,0x38,0xdd,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl xmm2, [edx - 6144] +0xf3,0x0f,0x38,0xdd,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xdf,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xdf,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl xmm2, [eax] +0xf3,0x0f,0x38,0xdf,0x10 + +# CHECK: aesdec256kl xmm2, [2*ebp - 2048] +0xf3,0x0f,0x38,0xdf,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl xmm2, [ecx + 8128] +0xf3,0x0f,0x38,0xdf,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl xmm2, [edx - 8192] +0xf3,0x0f,0x38,0xdf,0x92,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xdc,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xdc,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl xmm2, [eax] +0xf3,0x0f,0x38,0xdc,0x10 + +# CHECK: aesenc128kl xmm2, [2*ebp - 1536] +0xf3,0x0f,0x38,0xdc,0x14,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl xmm2, [ecx + 6096] +0xf3,0x0f,0x38,0xdc,0x91,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl xmm2, [edx - 6144] +0xf3,0x0f,0x38,0xdc,0x92,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl xmm2, [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xde,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl xmm2, [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xde,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl xmm2, [eax] +0xf3,0x0f,0x38,0xde,0x10 + +# CHECK: aesenc256kl xmm2, [2*ebp - 2048] +0xf3,0x0f,0x38,0xde,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl xmm2, [ecx + 8128] +0xf3,0x0f,0x38,0xde,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl xmm2, [edx - 8192] +0xf3,0x0f,0x38,0xde,0x92,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey xmm2, xmm3 +0xf3,0x0f,0x38,0xdc,0xd3 + +# CHECK: aesdecwide128kl [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xd8,0x8c,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide128kl [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xd8,0x8c,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide128kl [eax] +0xf3,0x0f,0x38,0xd8,0x08 + +# CHECK: aesdecwide128kl [2*ebp - 1536] +0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdecwide128kl [ecx + 6096] +0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00 + +# CHECK: aesdecwide128kl [edx - 6144] +0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff + +# CHECK: aesdecwide256kl [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xd8,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide256kl [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xd8,0x9c,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide256kl [eax] +0xf3,0x0f,0x38,0xd8,0x18 + +# CHECK: aesdecwide256kl [2*ebp - 2048] +0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdecwide256kl [ecx + 8128] +0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdecwide256kl [edx - 8192] +0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff + +# CHECK: aesencwide128kl [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xd8,0x84,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide128kl [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xd8,0x84,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide128kl [eax] +0xf3,0x0f,0x38,0xd8,0x00 + +# CHECK: aesencwide128kl [2*ebp - 1536] +0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesencwide128kl [ecx + 6096] +0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00 + +# CHECK: aesencwide128kl [edx - 6144] +0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff + +# CHECK: aesencwide256kl [esp + 8*esi + 268435456] +0xf3,0x0f,0x38,0xd8,0x94,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide256kl [edi + 4*eax + 291] +0xf3,0x0f,0x38,0xd8,0x94,0x87,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide256kl [eax] +0xf3,0x0f,0x38,0xd8,0x10 + +# CHECK: aesencwide256kl [2*ebp - 2048] +0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesencwide256kl [ecx + 8128] +0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesencwide256kl [edx - 8192] +0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff diff --git a/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-att.txt b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-att.txt new file mode 100644 index 0000000000000..973677d92aa32 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-att.txt @@ -0,0 +1,277 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s +# CHECK: loadiwkey %xmm2, %xmm3 +0xf3 0x0f 0x38 0xdc 0xda + +# CHECK: loadiwkey %xmm2, %xmm6 +0xf3 0x0f 0x38 0xdc 0xf2 + +# CHECK: encodekey128 %eax, %ebx +0xf3 0x0f 0x38 0xfa 0xd8 + +# CHECK: encodekey128 %eax, %edx +0xf3 0x0f 0x38 0xfa 0xd0 + +# CHECK: encodekey256 %eax, %ebx +0xf3 0x0f 0x38 0xfb 0xd8 + +# CHECK: encodekey256 %eax, %edx +0xf3 0x0f 0x38 0xfb 0xd0 + +# CHECK: aesenc128kl 126(%rdx), %xmm2 +0xf3 0x0f 0x38 0xdc 0x52 0x7e + +# CHECK: aesdec128kl 126(%rdx), %xmm2 +0xf3 0x0f 0x38 0xdd 0x52 0x7e + +# CHECK: aesenc256kl 126(%rdx), %xmm2 +0xf3 0x0f 0x38 0xde 0x52 0x7e + +# CHECK: aesdec256kl 126(%rdx), %xmm2 +0xf3 0x0f 0x38 0xdf 0x52 0x7e + +# CHECK: aesencwide128kl (%rbx) +0xf3 0x0f 0x38 0xd8 0x03 + +# CHECK: aesencwide128kl 126(%rdx) +0xf3 0x0f 0x38 0xd8 0x42 0x7e + +# CHECK: aesdecwide128kl (%rbx) +0xf3 0x0f 0x38 0xd8 0x0b + +# CHECK: aesdecwide128kl 126(%rdx) +0xf3 0x0f 0x38 0xd8 0x4a 0x7e + +# CHECK: aesencwide256kl (%rbx) +0xf3 0x0f 0x38 0xd8 0x13 + +# CHECK: aesencwide256kl 126(%rdx) +0xf3 0x0f 0x38 0xd8 0x52 0x7e + +# CHECK: aesdecwide256kl (%rbx) +0xf3 0x0f 0x38 0xd8 0x1b + +# CHECK: aesdecwide256kl 126(%rdx) +0xf3 0x0f 0x38 0xd8 0x5a 0x7e + +# CHECK: aesdec128kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xdd,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xdd,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xdd,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec128kl -1536(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xdd,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl 6096(%rcx), %xmm6 +0xf3,0x0f,0x38,0xdd,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl -6144(%rdx), %xmm6 +0xf3,0x0f,0x38,0xdd,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xdf,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xdf,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xdf,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec256kl -2048(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xdf,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl 8128(%rcx), %xmm6 +0xf3,0x0f,0x38,0xdf,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl -8192(%rdx), %xmm6 +0xf3,0x0f,0x38,0xdf,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xdc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xdc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xdc,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc128kl -1536(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xdc,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl 6096(%rcx), %xmm6 +0xf3,0x0f,0x38,0xdc,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl -6144(%rdx), %xmm6 +0xf3,0x0f,0x38,0xdc,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xde,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xde,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xde,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc256kl -2048(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xde,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl 8128(%rcx), %xmm6 +0xf3,0x0f,0x38,0xde,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl -8192(%rdx), %xmm6 +0xf3,0x0f,0x38,0xde,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey %xmm7, %xmm6 +0xf3,0x0f,0x38,0xdc,0xf7 + +# CHECK: aesdec128kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xdd,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xdd,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xdd,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec128kl -1536(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xdd,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl 6096(%rcx), %xmm6 +0xf3,0x0f,0x38,0xdd,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl -6144(%rdx), %xmm6 +0xf3,0x0f,0x38,0xdd,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xdf,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xdf,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xdf,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec256kl -2048(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xdf,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl 8128(%rcx), %xmm6 +0xf3,0x0f,0x38,0xdf,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl -8192(%rdx), %xmm6 +0xf3,0x0f,0x38,0xdf,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xdc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xdc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xdc,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc128kl -1536(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xdc,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl 6096(%rcx), %xmm6 +0xf3,0x0f,0x38,0xdc,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl -6144(%rdx), %xmm6 +0xf3,0x0f,0x38,0xdc,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl 268435456(%rbp,%r14,8), %xmm6 +0xf3,0x42,0x0f,0x38,0xde,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl 291(%r8,%rax,4), %xmm6 +0xf3,0x41,0x0f,0x38,0xde,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl (%rip), %xmm6 +0xf3,0x0f,0x38,0xde,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc256kl -2048(,%rbp,2), %xmm6 +0xf3,0x0f,0x38,0xde,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl 8128(%rcx), %xmm6 +0xf3,0x0f,0x38,0xde,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl -8192(%rdx), %xmm6 +0xf3,0x0f,0x38,0xde,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey %xmm7, %xmm6 +0xf3,0x0f,0x38,0xdc,0xf7 + +# CHECK: aesdecwide128kl 268435456(%rbp,%r14,8) +0xf3,0x42,0x0f,0x38,0xd8,0x8c,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide128kl 291(%r8,%rax,4) +0xf3,0x41,0x0f,0x38,0xd8,0x8c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide128kl (%rip) +0xf3,0x0f,0x38,0xd8,0x0d,0x00,0x00,0x00,0x00 + +# CHECK: aesdecwide128kl -1536(,%rbp,2) +0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdecwide128kl 6096(%rcx) +0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00 + +# CHECK: aesdecwide128kl -6144(%rdx) +0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff + +# CHECK: aesdecwide256kl 268435456(%rbp,%r14,8) +0xf3,0x42,0x0f,0x38,0xd8,0x9c,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide256kl 291(%r8,%rax,4) +0xf3,0x41,0x0f,0x38,0xd8,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide256kl (%rip) +0xf3,0x0f,0x38,0xd8,0x1d,0x00,0x00,0x00,0x00 + +# CHECK: aesdecwide256kl -2048(,%rbp,2) +0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdecwide256kl 8128(%rcx) +0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdecwide256kl -8192(%rdx) +0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff + +# CHECK: aesencwide128kl 268435456(%rbp,%r14,8) +0xf3,0x42,0x0f,0x38,0xd8,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide128kl 291(%r8,%rax,4) +0xf3,0x41,0x0f,0x38,0xd8,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide128kl (%rip) +0xf3,0x0f,0x38,0xd8,0x05,0x00,0x00,0x00,0x00 + +# CHECK: aesencwide128kl -1536(,%rbp,2) +0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesencwide128kl 6096(%rcx) +0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00 + +# CHECK: aesencwide128kl -6144(%rdx) +0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff + +# CHECK: aesencwide256kl 268435456(%rbp,%r14,8) +0xf3,0x42,0x0f,0x38,0xd8,0x94,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide256kl 291(%r8,%rax,4) +0xf3,0x41,0x0f,0x38,0xd8,0x94,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide256kl (%rip) +0xf3,0x0f,0x38,0xd8,0x15,0x00,0x00,0x00,0x00 + +# CHECK: aesencwide256kl -2048(,%rbp,2) +0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesencwide256kl 8128(%rcx) +0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesencwide256kl -8192(%rdx) +0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff + diff --git a/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-intel.txt b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-intel.txt new file mode 100644 index 0000000000000..262c6185f85b0 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/KEYLOCKER/Keylocker-x86-64-intel.txt @@ -0,0 +1,223 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: aesdec128kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xdd,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xdd,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl xmm6, [rip] +0xf3,0x0f,0x38,0xdd,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec128kl xmm6, [2*rbp - 1536] +0xf3,0x0f,0x38,0xdd,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl xmm6, [rcx + 6096] +0xf3,0x0f,0x38,0xdd,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl xmm6, [rdx - 6144] +0xf3,0x0f,0x38,0xdd,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xdf,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xdf,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl xmm6, [rip] +0xf3,0x0f,0x38,0xdf,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec256kl xmm6, [2*rbp - 2048] +0xf3,0x0f,0x38,0xdf,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl xmm6, [rcx + 8128] +0xf3,0x0f,0x38,0xdf,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl xmm6, [rdx - 8192] +0xf3,0x0f,0x38,0xdf,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xdc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xdc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl xmm6, [rip] +0xf3,0x0f,0x38,0xdc,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc128kl xmm6, [2*rbp - 1536] +0xf3,0x0f,0x38,0xdc,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl xmm6, [rcx + 6096] +0xf3,0x0f,0x38,0xdc,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl xmm6, [rdx - 6144] +0xf3,0x0f,0x38,0xdc,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xde,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xde,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl xmm6, [rip] +0xf3,0x0f,0x38,0xde,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc256kl xmm6, [2*rbp - 2048] +0xf3,0x0f,0x38,0xde,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl xmm6, [rcx + 8128] +0xf3,0x0f,0x38,0xde,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl xmm6, [rdx - 8192] +0xf3,0x0f,0x38,0xde,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey xmm6, xmm7 +0xf3,0x0f,0x38,0xdc,0xf7 + +# CHECK: aesdec128kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xdd,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec128kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xdd,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec128kl xmm6, [rip] +0xf3,0x0f,0x38,0xdd,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec128kl xmm6, [2*rbp - 1536] +0xf3,0x0f,0x38,0xdd,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdec128kl xmm6, [rcx + 6096] +0xf3,0x0f,0x38,0xdd,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesdec128kl xmm6, [rdx - 6144] +0xf3,0x0f,0x38,0xdd,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesdec256kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xdf,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdec256kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xdf,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdec256kl xmm6, [rip] +0xf3,0x0f,0x38,0xdf,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesdec256kl xmm6, [2*rbp - 2048] +0xf3,0x0f,0x38,0xdf,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdec256kl xmm6, [rcx + 8128] +0xf3,0x0f,0x38,0xdf,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdec256kl xmm6, [rdx - 8192] +0xf3,0x0f,0x38,0xdf,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: aesenc128kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xdc,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc128kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xdc,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc128kl xmm6, [rip] +0xf3,0x0f,0x38,0xdc,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc128kl xmm6, [2*rbp - 1536] +0xf3,0x0f,0x38,0xdc,0x34,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesenc128kl xmm6, [rcx + 6096] +0xf3,0x0f,0x38,0xdc,0xb1,0xd0,0x17,0x00,0x00 + +# CHECK: aesenc128kl xmm6, [rdx - 6144] +0xf3,0x0f,0x38,0xdc,0xb2,0x00,0xe8,0xff,0xff + +# CHECK: aesenc256kl xmm6, [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xde,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesenc256kl xmm6, [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xde,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesenc256kl xmm6, [rip] +0xf3,0x0f,0x38,0xde,0x35,0x00,0x00,0x00,0x00 + +# CHECK: aesenc256kl xmm6, [2*rbp - 2048] +0xf3,0x0f,0x38,0xde,0x34,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesenc256kl xmm6, [rcx + 8128] +0xf3,0x0f,0x38,0xde,0xb1,0xc0,0x1f,0x00,0x00 + +# CHECK: aesenc256kl xmm6, [rdx - 8192] +0xf3,0x0f,0x38,0xde,0xb2,0x00,0xe0,0xff,0xff + +# CHECK: loadiwkey xmm6, xmm7 +0xf3,0x0f,0x38,0xdc,0xf7 + +# CHECK: aesdecwide128kl [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xd8,0x8c,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide128kl [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xd8,0x8c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide128kl [rip] +0xf3,0x0f,0x38,0xd8,0x0d,0x00,0x00,0x00,0x00 + +# CHECK: aesdecwide128kl [2*rbp - 1536] +0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesdecwide128kl [rcx + 6096] +0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00 + +# CHECK: aesdecwide128kl [rdx - 6144] +0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff + +# CHECK: aesdecwide256kl [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xd8,0x9c,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesdecwide256kl [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xd8,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesdecwide256kl [rip] +0xf3,0x0f,0x38,0xd8,0x1d,0x00,0x00,0x00,0x00 + +# CHECK: aesdecwide256kl [2*rbp - 2048] +0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesdecwide256kl [rcx + 8128] +0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00 + +# CHECK: aesdecwide256kl [rdx - 8192] +0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff + +# CHECK: aesencwide128kl [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xd8,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide128kl [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xd8,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide128kl [rip] +0xf3,0x0f,0x38,0xd8,0x05,0x00,0x00,0x00,0x00 + +# CHECK: aesencwide128kl [2*rbp - 1536] +0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff + +# CHECK: aesencwide128kl [rcx + 6096] +0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00 + +# CHECK: aesencwide128kl [rdx - 6144] +0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff + +# CHECK: aesencwide256kl [rbp + 8*r14 + 268435456] +0xf3,0x42,0x0f,0x38,0xd8,0x94,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: aesencwide256kl [r8 + 4*rax + 291] +0xf3,0x41,0x0f,0x38,0xd8,0x94,0x80,0x23,0x01,0x00,0x00 + +# CHECK: aesencwide256kl [rip] +0xf3,0x0f,0x38,0xd8,0x15,0x00,0x00,0x00,0x00 + +# CHECK: aesencwide256kl [2*rbp - 2048] +0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: aesencwide256kl [rcx + 8128] +0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00 + +# CHECK: aesencwide256kl [rdx - 8192] +0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff diff --git a/llvm/test/MC/X86/KEYLOCKER/keylocker-att.s b/llvm/test/MC/X86/KEYLOCKER/keylocker-att.s new file mode 100644 index 0000000000000..3352a2f5ec810 --- /dev/null +++ b/llvm/test/MC/X86/KEYLOCKER/keylocker-att.s @@ -0,0 +1,205 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: aesdec128kl 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x94,0xf4,0x00,0x00,0x00,0x10] + aesdec128kl 268435456(%esp,%esi,8), %xmm2 + +// CHECK: aesdec128kl 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x94,0x87,0x23,0x01,0x00,0x00] + aesdec128kl 291(%edi,%eax,4), %xmm2 + +// CHECK: aesdec128kl (%eax), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x10] + aesdec128kl (%eax), %xmm2 + +// CHECK: aesdec128kl -1536(,%ebp,2), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x14,0x6d,0x00,0xfa,0xff,0xff] + aesdec128kl -1536(,%ebp,2), %xmm2 + +// CHECK: aesdec128kl 6096(%ecx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x91,0xd0,0x17,0x00,0x00] + aesdec128kl 6096(%ecx), %xmm2 + +// CHECK: aesdec128kl -6144(%edx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x92,0x00,0xe8,0xff,0xff] + aesdec128kl -6144(%edx), %xmm2 + +// CHECK: aesdec256kl 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x94,0xf4,0x00,0x00,0x00,0x10] + aesdec256kl 268435456(%esp,%esi,8), %xmm2 + +// CHECK: aesdec256kl 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x94,0x87,0x23,0x01,0x00,0x00] + aesdec256kl 291(%edi,%eax,4), %xmm2 + +// CHECK: aesdec256kl (%eax), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x10] + aesdec256kl (%eax), %xmm2 + +// CHECK: aesdec256kl -2048(,%ebp,2), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesdec256kl -2048(,%ebp,2), %xmm2 + +// CHECK: aesdec256kl 8128(%ecx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x91,0xc0,0x1f,0x00,0x00] + aesdec256kl 8128(%ecx), %xmm2 + +// CHECK: aesdec256kl -8192(%edx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x92,0x00,0xe0,0xff,0xff] + aesdec256kl -8192(%edx), %xmm2 + +// CHECK: aesenc128kl 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x94,0xf4,0x00,0x00,0x00,0x10] + aesenc128kl 268435456(%esp,%esi,8), %xmm2 + +// CHECK: aesenc128kl 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x94,0x87,0x23,0x01,0x00,0x00] + aesenc128kl 291(%edi,%eax,4), %xmm2 + +// CHECK: aesenc128kl (%eax), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x10] + aesenc128kl (%eax), %xmm2 + +// CHECK: aesenc128kl -1536(,%ebp,2), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x14,0x6d,0x00,0xfa,0xff,0xff] + aesenc128kl -1536(,%ebp,2), %xmm2 + +// CHECK: aesenc128kl 6096(%ecx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x91,0xd0,0x17,0x00,0x00] + aesenc128kl 6096(%ecx), %xmm2 + +// CHECK: aesenc128kl -6144(%edx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x92,0x00,0xe8,0xff,0xff] + aesenc128kl -6144(%edx), %xmm2 + +// CHECK: aesenc256kl 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x94,0xf4,0x00,0x00,0x00,0x10] + aesenc256kl 268435456(%esp,%esi,8), %xmm2 + +// CHECK: aesenc256kl 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x94,0x87,0x23,0x01,0x00,0x00] + aesenc256kl 291(%edi,%eax,4), %xmm2 + +// CHECK: aesenc256kl (%eax), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x10] + aesenc256kl (%eax), %xmm2 + +// CHECK: aesenc256kl -2048(,%ebp,2), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesenc256kl -2048(,%ebp,2), %xmm2 + +// CHECK: aesenc256kl 8128(%ecx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x91,0xc0,0x1f,0x00,0x00] + aesenc256kl 8128(%ecx), %xmm2 + +// CHECK: aesenc256kl -8192(%edx), %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x92,0x00,0xe0,0xff,0xff] + aesenc256kl -8192(%edx), %xmm2 + +// CHECK: encodekey128 %ecx, %ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfa,0xc9] + encodekey128 %ecx, %ecx + +// CHECK: encodekey256 %ecx, %ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfb,0xc9] + encodekey256 %ecx, %ecx + +// CHECK: loadiwkey %xmm3, %xmm2 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xd3] + loadiwkey %xmm3, %xmm2 + +// CHECK: aesdecwide128kl 268435456(%esp,%esi,8) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8c,0xf4,0x00,0x00,0x00,0x10] + aesdecwide128kl 268435456(%esp,%esi,8) + +// CHECK: aesdecwide128kl 291(%edi,%eax,4) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8c,0x87,0x23,0x01,0x00,0x00] + aesdecwide128kl 291(%edi,%eax,4) + +// CHECK: aesdecwide128kl (%eax) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x08] + aesdecwide128kl (%eax) + +// CHECK: aesdecwide128kl -1536(,%ebp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff] + aesdecwide128kl -1536(,%ebp,2) + +// CHECK: aesdecwide128kl 6096(%ecx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00] + aesdecwide128kl 6096(%ecx) + +// CHECK: aesdecwide128kl -6144(%edx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff] + aesdecwide128kl -6144(%edx) + +// CHECK: aesdecwide256kl 268435456(%esp,%esi,8) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9c,0xf4,0x00,0x00,0x00,0x10] + aesdecwide256kl 268435456(%esp,%esi,8) + +// CHECK: aesdecwide256kl 291(%edi,%eax,4) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9c,0x87,0x23,0x01,0x00,0x00] + aesdecwide256kl 291(%edi,%eax,4) + +// CHECK: aesdecwide256kl (%eax) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x18] + aesdecwide256kl (%eax) + +// CHECK: aesdecwide256kl -2048(,%ebp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff] + aesdecwide256kl -2048(,%ebp,2) + +// CHECK: aesdecwide256kl 8128(%ecx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00] + aesdecwide256kl 8128(%ecx) + +// CHECK: aesdecwide256kl -8192(%edx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff] + aesdecwide256kl -8192(%edx) + +// CHECK: aesencwide128kl 268435456(%esp,%esi,8) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x84,0xf4,0x00,0x00,0x00,0x10] + aesencwide128kl 268435456(%esp,%esi,8) + +// CHECK: aesencwide128kl 291(%edi,%eax,4) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x84,0x87,0x23,0x01,0x00,0x00] + aesencwide128kl 291(%edi,%eax,4) + +// CHECK: aesencwide128kl (%eax) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x00] + aesencwide128kl (%eax) + +// CHECK: aesencwide128kl -1536(,%ebp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff] + aesencwide128kl -1536(,%ebp,2) + +// CHECK: aesencwide128kl 6096(%ecx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00] + aesencwide128kl 6096(%ecx) + +// CHECK: aesencwide128kl -6144(%edx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff] + aesencwide128kl -6144(%edx) + +// CHECK: aesencwide256kl 268435456(%esp,%esi,8) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x94,0xf4,0x00,0x00,0x00,0x10] + aesencwide256kl 268435456(%esp,%esi,8) + +// CHECK: aesencwide256kl 291(%edi,%eax,4) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x94,0x87,0x23,0x01,0x00,0x00] + aesencwide256kl 291(%edi,%eax,4) + +// CHECK: aesencwide256kl (%eax) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x10] + aesencwide256kl (%eax) + +// CHECK: aesencwide256kl -2048(,%ebp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesencwide256kl -2048(,%ebp,2) + +// CHECK: aesencwide256kl 8128(%ecx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00] + aesencwide256kl 8128(%ecx) + +// CHECK: aesencwide256kl -8192(%edx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff] + aesencwide256kl -8192(%edx) diff --git a/llvm/test/MC/X86/KEYLOCKER/keylocker-intel.s b/llvm/test/MC/X86/KEYLOCKER/keylocker-intel.s new file mode 100644 index 0000000000000..7eb1e0df8c559 --- /dev/null +++ b/llvm/test/MC/X86/KEYLOCKER/keylocker-intel.s @@ -0,0 +1,205 @@ +// RUN: llvm-mc -triple i386-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: aesdec128kl xmm2, [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x94,0xf4,0x00,0x00,0x00,0x10] + aesdec128kl xmm2, [esp + 8*esi + 268435456] + +// CHECK: aesdec128kl xmm2, [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x94,0x87,0x23,0x01,0x00,0x00] + aesdec128kl xmm2, [edi + 4*eax + 291] + +// CHECK: aesdec128kl xmm2, [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x10] + aesdec128kl xmm2, [eax] + +// CHECK: aesdec128kl xmm2, [2*ebp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x14,0x6d,0x00,0xfa,0xff,0xff] + aesdec128kl xmm2, [2*ebp - 1536] + +// CHECK: aesdec128kl xmm2, [ecx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x91,0xd0,0x17,0x00,0x00] + aesdec128kl xmm2, [ecx + 6096] + +// CHECK: aesdec128kl xmm2, [edx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x92,0x00,0xe8,0xff,0xff] + aesdec128kl xmm2, [edx - 6144] + +// CHECK: aesdec256kl xmm2, [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x94,0xf4,0x00,0x00,0x00,0x10] + aesdec256kl xmm2, [esp + 8*esi + 268435456] + +// CHECK: aesdec256kl xmm2, [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x94,0x87,0x23,0x01,0x00,0x00] + aesdec256kl xmm2, [edi + 4*eax + 291] + +// CHECK: aesdec256kl xmm2, [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x10] + aesdec256kl xmm2, [eax] + +// CHECK: aesdec256kl xmm2, [2*ebp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesdec256kl xmm2, [2*ebp - 2048] + +// CHECK: aesdec256kl xmm2, [ecx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x91,0xc0,0x1f,0x00,0x00] + aesdec256kl xmm2, [ecx + 8128] + +// CHECK: aesdec256kl xmm2, [edx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x92,0x00,0xe0,0xff,0xff] + aesdec256kl xmm2, [edx - 8192] + +// CHECK: aesenc128kl xmm2, [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x94,0xf4,0x00,0x00,0x00,0x10] + aesenc128kl xmm2, [esp + 8*esi + 268435456] + +// CHECK: aesenc128kl xmm2, [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x94,0x87,0x23,0x01,0x00,0x00] + aesenc128kl xmm2, [edi + 4*eax + 291] + +// CHECK: aesenc128kl xmm2, [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x10] + aesenc128kl xmm2, [eax] + +// CHECK: aesenc128kl xmm2, [2*ebp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x14,0x6d,0x00,0xfa,0xff,0xff] + aesenc128kl xmm2, [2*ebp - 1536] + +// CHECK: aesenc128kl xmm2, [ecx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x91,0xd0,0x17,0x00,0x00] + aesenc128kl xmm2, [ecx + 6096] + +// CHECK: aesenc128kl xmm2, [edx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x92,0x00,0xe8,0xff,0xff] + aesenc128kl xmm2, [edx - 6144] + +// CHECK: aesenc256kl xmm2, [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x94,0xf4,0x00,0x00,0x00,0x10] + aesenc256kl xmm2, [esp + 8*esi + 268435456] + +// CHECK: aesenc256kl xmm2, [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x94,0x87,0x23,0x01,0x00,0x00] + aesenc256kl xmm2, [edi + 4*eax + 291] + +// CHECK: aesenc256kl xmm2, [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x10] + aesenc256kl xmm2, [eax] + +// CHECK: aesenc256kl xmm2, [2*ebp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesenc256kl xmm2, [2*ebp - 2048] + +// CHECK: aesenc256kl xmm2, [ecx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x91,0xc0,0x1f,0x00,0x00] + aesenc256kl xmm2, [ecx + 8128] + +// CHECK: aesenc256kl xmm2, [edx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x92,0x00,0xe0,0xff,0xff] + aesenc256kl xmm2, [edx - 8192] + +// CHECK: encodekey128 ecx, ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfa,0xc9] + encodekey128 ecx, ecx + +// CHECK: encodekey256 ecx, ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfb,0xc9] + encodekey256 ecx, ecx + +// CHECK: loadiwkey xmm2, xmm3 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xd3] + loadiwkey xmm2, xmm3 + +// CHECK: aesdecwide128kl [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8c,0xf4,0x00,0x00,0x00,0x10] + aesdecwide128kl [esp + 8*esi + 268435456] + +// CHECK: aesdecwide128kl [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8c,0x87,0x23,0x01,0x00,0x00] + aesdecwide128kl [edi + 4*eax + 291] + +// CHECK: aesdecwide128kl [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x08] + aesdecwide128kl [eax] + +// CHECK: aesdecwide128kl [2*ebp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff] + aesdecwide128kl [2*ebp - 1536] + +// CHECK: aesdecwide128kl [ecx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00] + aesdecwide128kl [ecx + 6096] + +// CHECK: aesdecwide128kl [edx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff] + aesdecwide128kl [edx - 6144] + +// CHECK: aesdecwide256kl [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9c,0xf4,0x00,0x00,0x00,0x10] + aesdecwide256kl [esp + 8*esi + 268435456] + +// CHECK: aesdecwide256kl [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9c,0x87,0x23,0x01,0x00,0x00] + aesdecwide256kl [edi + 4*eax + 291] + +// CHECK: aesdecwide256kl [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x18] + aesdecwide256kl [eax] + +// CHECK: aesdecwide256kl [2*ebp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff] + aesdecwide256kl [2*ebp - 2048] + +// CHECK: aesdecwide256kl [ecx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00] + aesdecwide256kl [ecx + 8128] + +// CHECK: aesdecwide256kl [edx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff] + aesdecwide256kl [edx - 8192] + +// CHECK: aesencwide128kl [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x84,0xf4,0x00,0x00,0x00,0x10] + aesencwide128kl [esp + 8*esi + 268435456] + +// CHECK: aesencwide128kl [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x84,0x87,0x23,0x01,0x00,0x00] + aesencwide128kl [edi + 4*eax + 291] + +// CHECK: aesencwide128kl [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x00] + aesencwide128kl [eax] + +// CHECK: aesencwide128kl [2*ebp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff] + aesencwide128kl [2*ebp - 1536] + +// CHECK: aesencwide128kl [ecx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00] + aesencwide128kl [ecx + 6096] + +// CHECK: aesencwide128kl [edx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff] + aesencwide128kl [edx - 6144] + +// CHECK: aesencwide256kl [esp + 8*esi + 268435456] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x94,0xf4,0x00,0x00,0x00,0x10] + aesencwide256kl [esp + 8*esi + 268435456] + +// CHECK: aesencwide256kl [edi + 4*eax + 291] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x94,0x87,0x23,0x01,0x00,0x00] + aesencwide256kl [edi + 4*eax + 291] + +// CHECK: aesencwide256kl [eax] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x10] + aesencwide256kl [eax] + +// CHECK: aesencwide256kl [2*ebp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesencwide256kl [2*ebp - 2048] + +// CHECK: aesencwide256kl [ecx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00] + aesencwide256kl [ecx + 8128] + +// CHECK: aesencwide256kl [edx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff] + aesencwide256kl [edx - 8192] diff --git a/llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-att.s b/llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-att.s new file mode 100644 index 0000000000000..dc467d76c2872 --- /dev/null +++ b/llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-att.s @@ -0,0 +1,205 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: aesdec128kl 268435456(%rbp,%r14,8), %xmm6 +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xdd,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesdec128kl 268435456(%rbp,%r14,8), %xmm6 + +// CHECK: aesdec128kl 291(%r8,%rax,4), %xmm6 +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xdd,0xb4,0x80,0x23,0x01,0x00,0x00] + aesdec128kl 291(%r8,%rax,4), %xmm6 + +// CHECK: aesdec128kl (%rip), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x35,0x00,0x00,0x00,0x00] + aesdec128kl (%rip), %xmm6 + +// CHECK: aesdec128kl -1536(,%rbp,2), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x34,0x6d,0x00,0xfa,0xff,0xff] + aesdec128kl -1536(,%rbp,2), %xmm6 + +// CHECK: aesdec128kl 6096(%rcx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0xb1,0xd0,0x17,0x00,0x00] + aesdec128kl 6096(%rcx), %xmm6 + +// CHECK: aesdec128kl -6144(%rdx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0xb2,0x00,0xe8,0xff,0xff] + aesdec128kl -6144(%rdx), %xmm6 + +// CHECK: aesdec256kl 268435456(%rbp,%r14,8), %xmm6 +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xdf,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesdec256kl 268435456(%rbp,%r14,8), %xmm6 + +// CHECK: aesdec256kl 291(%r8,%rax,4), %xmm6 +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xdf,0xb4,0x80,0x23,0x01,0x00,0x00] + aesdec256kl 291(%r8,%rax,4), %xmm6 + +// CHECK: aesdec256kl (%rip), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x35,0x00,0x00,0x00,0x00] + aesdec256kl (%rip), %xmm6 + +// CHECK: aesdec256kl -2048(,%rbp,2), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x34,0x6d,0x00,0xf8,0xff,0xff] + aesdec256kl -2048(,%rbp,2), %xmm6 + +// CHECK: aesdec256kl 8128(%rcx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0xb1,0xc0,0x1f,0x00,0x00] + aesdec256kl 8128(%rcx), %xmm6 + +// CHECK: aesdec256kl -8192(%rdx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0xb2,0x00,0xe0,0xff,0xff] + aesdec256kl -8192(%rdx), %xmm6 + +// CHECK: aesenc128kl 268435456(%rbp,%r14,8), %xmm6 +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xdc,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesenc128kl 268435456(%rbp,%r14,8), %xmm6 + +// CHECK: aesenc128kl 291(%r8,%rax,4), %xmm6 +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xdc,0xb4,0x80,0x23,0x01,0x00,0x00] + aesenc128kl 291(%r8,%rax,4), %xmm6 + +// CHECK: aesenc128kl (%rip), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x35,0x00,0x00,0x00,0x00] + aesenc128kl (%rip), %xmm6 + +// CHECK: aesenc128kl -1536(,%rbp,2), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x34,0x6d,0x00,0xfa,0xff,0xff] + aesenc128kl -1536(,%rbp,2), %xmm6 + +// CHECK: aesenc128kl 6096(%rcx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xb1,0xd0,0x17,0x00,0x00] + aesenc128kl 6096(%rcx), %xmm6 + +// CHECK: aesenc128kl -6144(%rdx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xb2,0x00,0xe8,0xff,0xff] + aesenc128kl -6144(%rdx), %xmm6 + +// CHECK: aesenc256kl 268435456(%rbp,%r14,8), %xmm6 +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xde,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesenc256kl 268435456(%rbp,%r14,8), %xmm6 + +// CHECK: aesenc256kl 291(%r8,%rax,4), %xmm6 +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xde,0xb4,0x80,0x23,0x01,0x00,0x00] + aesenc256kl 291(%r8,%rax,4), %xmm6 + +// CHECK: aesenc256kl (%rip), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x35,0x00,0x00,0x00,0x00] + aesenc256kl (%rip), %xmm6 + +// CHECK: aesenc256kl -2048(,%rbp,2), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x34,0x6d,0x00,0xf8,0xff,0xff] + aesenc256kl -2048(,%rbp,2), %xmm6 + +// CHECK: aesenc256kl 8128(%rcx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0xb1,0xc0,0x1f,0x00,0x00] + aesenc256kl 8128(%rcx), %xmm6 + +// CHECK: aesenc256kl -8192(%rdx), %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0xb2,0x00,0xe0,0xff,0xff] + aesenc256kl -8192(%rdx), %xmm6 + +// CHECK: encodekey128 %ecx, %ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfa,0xc9] + encodekey128 %ecx, %ecx + +// CHECK: encodekey256 %ecx, %ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfb,0xc9] + encodekey256 %ecx, %ecx + +// CHECK: loadiwkey %xmm7, %xmm6 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xf7] + loadiwkey %xmm7, %xmm6 + +// CHECK: aesdecwide128kl 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x8c,0xf5,0x00,0x00,0x00,0x10] + aesdecwide128kl 268435456(%rbp,%r14,8) + +// CHECK: aesdecwide128kl 291(%r8,%rax,4) +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x8c,0x80,0x23,0x01,0x00,0x00] + aesdecwide128kl 291(%r8,%rax,4) + +// CHECK: aesdecwide128kl (%rip) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x0d,0x00,0x00,0x00,0x00] + aesdecwide128kl (%rip) + +// CHECK: aesdecwide128kl -1536(,%rbp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff] + aesdecwide128kl -1536(,%rbp,2) + +// CHECK: aesdecwide128kl 6096(%rcx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00] + aesdecwide128kl 6096(%rcx) + +// CHECK: aesdecwide128kl -6144(%rdx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff] + aesdecwide128kl -6144(%rdx) + +// CHECK: aesdecwide256kl 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x9c,0xf5,0x00,0x00,0x00,0x10] + aesdecwide256kl 268435456(%rbp,%r14,8) + +// CHECK: aesdecwide256kl 291(%r8,%rax,4) +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x9c,0x80,0x23,0x01,0x00,0x00] + aesdecwide256kl 291(%r8,%rax,4) + +// CHECK: aesdecwide256kl (%rip) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x1d,0x00,0x00,0x00,0x00] + aesdecwide256kl (%rip) + +// CHECK: aesdecwide256kl -2048(,%rbp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff] + aesdecwide256kl -2048(,%rbp,2) + +// CHECK: aesdecwide256kl 8128(%rcx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00] + aesdecwide256kl 8128(%rcx) + +// CHECK: aesdecwide256kl -8192(%rdx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff] + aesdecwide256kl -8192(%rdx) + +// CHECK: aesencwide128kl 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x84,0xf5,0x00,0x00,0x00,0x10] + aesencwide128kl 268435456(%rbp,%r14,8) + +// CHECK: aesencwide128kl 291(%r8,%rax,4) +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x84,0x80,0x23,0x01,0x00,0x00] + aesencwide128kl 291(%r8,%rax,4) + +// CHECK: aesencwide128kl (%rip) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x05,0x00,0x00,0x00,0x00] + aesencwide128kl (%rip) + +// CHECK: aesencwide128kl -1536(,%rbp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff] + aesencwide128kl -1536(,%rbp,2) + +// CHECK: aesencwide128kl 6096(%rcx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00] + aesencwide128kl 6096(%rcx) + +// CHECK: aesencwide128kl -6144(%rdx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff] + aesencwide128kl -6144(%rdx) + +// CHECK: aesencwide256kl 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x94,0xf5,0x00,0x00,0x00,0x10] + aesencwide256kl 268435456(%rbp,%r14,8) + +// CHECK: aesencwide256kl 291(%r8,%rax,4) +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x94,0x80,0x23,0x01,0x00,0x00] + aesencwide256kl 291(%r8,%rax,4) + +// CHECK: aesencwide256kl (%rip) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x15,0x00,0x00,0x00,0x00] + aesencwide256kl (%rip) + +// CHECK: aesencwide256kl -2048(,%rbp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesencwide256kl -2048(,%rbp,2) + +// CHECK: aesencwide256kl 8128(%rcx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00] + aesencwide256kl 8128(%rcx) + +// CHECK: aesencwide256kl -8192(%rdx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff] + aesencwide256kl -8192(%rdx) diff --git a/llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-intel.s b/llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-intel.s new file mode 100644 index 0000000000000..cb8921acdc1ff --- /dev/null +++ b/llvm/test/MC/X86/KEYLOCKER/x86-64-keylocker-intel.s @@ -0,0 +1,205 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: aesdec128kl xmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xdd,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesdec128kl xmm6, [rbp + 8*r14 + 268435456] + +// CHECK: aesdec128kl xmm6, [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xdd,0xb4,0x80,0x23,0x01,0x00,0x00] + aesdec128kl xmm6, [r8 + 4*rax + 291] + +// CHECK: aesdec128kl xmm6, [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x35,0x00,0x00,0x00,0x00] + aesdec128kl xmm6, [rip] + +// CHECK: aesdec128kl xmm6, [2*rbp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0x34,0x6d,0x00,0xfa,0xff,0xff] + aesdec128kl xmm6, [2*rbp - 1536] + +// CHECK: aesdec128kl xmm6, [rcx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0xb1,0xd0,0x17,0x00,0x00] + aesdec128kl xmm6, [rcx + 6096] + +// CHECK: aesdec128kl xmm6, [rdx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdd,0xb2,0x00,0xe8,0xff,0xff] + aesdec128kl xmm6, [rdx - 6144] + +// CHECK: aesdec256kl xmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xdf,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesdec256kl xmm6, [rbp + 8*r14 + 268435456] + +// CHECK: aesdec256kl xmm6, [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xdf,0xb4,0x80,0x23,0x01,0x00,0x00] + aesdec256kl xmm6, [r8 + 4*rax + 291] + +// CHECK: aesdec256kl xmm6, [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x35,0x00,0x00,0x00,0x00] + aesdec256kl xmm6, [rip] + +// CHECK: aesdec256kl xmm6, [2*rbp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0x34,0x6d,0x00,0xf8,0xff,0xff] + aesdec256kl xmm6, [2*rbp - 2048] + +// CHECK: aesdec256kl xmm6, [rcx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0xb1,0xc0,0x1f,0x00,0x00] + aesdec256kl xmm6, [rcx + 8128] + +// CHECK: aesdec256kl xmm6, [rdx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdf,0xb2,0x00,0xe0,0xff,0xff] + aesdec256kl xmm6, [rdx - 8192] + +// CHECK: aesenc128kl xmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xdc,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesenc128kl xmm6, [rbp + 8*r14 + 268435456] + +// CHECK: aesenc128kl xmm6, [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xdc,0xb4,0x80,0x23,0x01,0x00,0x00] + aesenc128kl xmm6, [r8 + 4*rax + 291] + +// CHECK: aesenc128kl xmm6, [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x35,0x00,0x00,0x00,0x00] + aesenc128kl xmm6, [rip] + +// CHECK: aesenc128kl xmm6, [2*rbp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0x34,0x6d,0x00,0xfa,0xff,0xff] + aesenc128kl xmm6, [2*rbp - 1536] + +// CHECK: aesenc128kl xmm6, [rcx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xb1,0xd0,0x17,0x00,0x00] + aesenc128kl xmm6, [rcx + 6096] + +// CHECK: aesenc128kl xmm6, [rdx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xb2,0x00,0xe8,0xff,0xff] + aesenc128kl xmm6, [rdx - 6144] + +// CHECK: aesenc256kl xmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xde,0xb4,0xf5,0x00,0x00,0x00,0x10] + aesenc256kl xmm6, [rbp + 8*r14 + 268435456] + +// CHECK: aesenc256kl xmm6, [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xde,0xb4,0x80,0x23,0x01,0x00,0x00] + aesenc256kl xmm6, [r8 + 4*rax + 291] + +// CHECK: aesenc256kl xmm6, [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x35,0x00,0x00,0x00,0x00] + aesenc256kl xmm6, [rip] + +// CHECK: aesenc256kl xmm6, [2*rbp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0x34,0x6d,0x00,0xf8,0xff,0xff] + aesenc256kl xmm6, [2*rbp - 2048] + +// CHECK: aesenc256kl xmm6, [rcx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0xb1,0xc0,0x1f,0x00,0x00] + aesenc256kl xmm6, [rcx + 8128] + +// CHECK: aesenc256kl xmm6, [rdx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xde,0xb2,0x00,0xe0,0xff,0xff] + aesenc256kl xmm6, [rdx - 8192] + +// CHECK: encodekey128 ecx, ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfa,0xc9] + encodekey128 ecx, ecx + +// CHECK: encodekey256 ecx, ecx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfb,0xc9] + encodekey256 ecx, ecx + +// CHECK: loadiwkey xmm6, xmm7 +// CHECK: encoding: [0xf3,0x0f,0x38,0xdc,0xf7] + loadiwkey xmm6, xmm7 + +// CHECK: aesdecwide128kl [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x8c,0xf5,0x00,0x00,0x00,0x10] + aesdecwide128kl [rbp + 8*r14 + 268435456] + +// CHECK: aesdecwide128kl [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x8c,0x80,0x23,0x01,0x00,0x00] + aesdecwide128kl [r8 + 4*rax + 291] + +// CHECK: aesdecwide128kl [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x0d,0x00,0x00,0x00,0x00] + aesdecwide128kl [rip] + +// CHECK: aesdecwide128kl [2*rbp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x0c,0x6d,0x00,0xfa,0xff,0xff] + aesdecwide128kl [2*rbp - 1536] + +// CHECK: aesdecwide128kl [rcx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x89,0xd0,0x17,0x00,0x00] + aesdecwide128kl [rcx + 6096] + +// CHECK: aesdecwide128kl [rdx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x8a,0x00,0xe8,0xff,0xff] + aesdecwide128kl [rdx - 6144] + +// CHECK: aesdecwide256kl [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x9c,0xf5,0x00,0x00,0x00,0x10] + aesdecwide256kl [rbp + 8*r14 + 268435456] + +// CHECK: aesdecwide256kl [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x9c,0x80,0x23,0x01,0x00,0x00] + aesdecwide256kl [r8 + 4*rax + 291] + +// CHECK: aesdecwide256kl [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x1d,0x00,0x00,0x00,0x00] + aesdecwide256kl [rip] + +// CHECK: aesdecwide256kl [2*rbp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x1c,0x6d,0x00,0xf8,0xff,0xff] + aesdecwide256kl [2*rbp - 2048] + +// CHECK: aesdecwide256kl [rcx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x99,0xc0,0x1f,0x00,0x00] + aesdecwide256kl [rcx + 8128] + +// CHECK: aesdecwide256kl [rdx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x9a,0x00,0xe0,0xff,0xff] + aesdecwide256kl [rdx - 8192] + +// CHECK: aesencwide128kl [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x84,0xf5,0x00,0x00,0x00,0x10] + aesencwide128kl [rbp + 8*r14 + 268435456] + +// CHECK: aesencwide128kl [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x84,0x80,0x23,0x01,0x00,0x00] + aesencwide128kl [r8 + 4*rax + 291] + +// CHECK: aesencwide128kl [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x05,0x00,0x00,0x00,0x00] + aesencwide128kl [rip] + +// CHECK: aesencwide128kl [2*rbp - 1536] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x04,0x6d,0x00,0xfa,0xff,0xff] + aesencwide128kl [2*rbp - 1536] + +// CHECK: aesencwide128kl [rcx + 6096] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x81,0xd0,0x17,0x00,0x00] + aesencwide128kl [rcx + 6096] + +// CHECK: aesencwide128kl [rdx - 6144] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x82,0x00,0xe8,0xff,0xff] + aesencwide128kl [rdx - 6144] + +// CHECK: aesencwide256kl [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xd8,0x94,0xf5,0x00,0x00,0x00,0x10] + aesencwide256kl [rbp + 8*r14 + 268435456] + +// CHECK: aesencwide256kl [r8 + 4*rax + 291] +// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xd8,0x94,0x80,0x23,0x01,0x00,0x00] + aesencwide256kl [r8 + 4*rax + 291] + +// CHECK: aesencwide256kl [rip] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x15,0x00,0x00,0x00,0x00] + aesencwide256kl [rip] + +// CHECK: aesencwide256kl [2*rbp - 2048] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x14,0x6d,0x00,0xf8,0xff,0xff] + aesencwide256kl [2*rbp - 2048] + +// CHECK: aesencwide256kl [rcx + 8128] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x91,0xc0,0x1f,0x00,0x00] + aesencwide256kl [rcx + 8128] + +// CHECK: aesencwide256kl [rdx - 8192] +// CHECK: encoding: [0xf3,0x0f,0x38,0xd8,0x92,0x00,0xe0,0xff,0xff] + aesencwide256kl [rdx - 8192] diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 4e368fac2c834..6a8a60d00639d 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -246,7 +246,8 @@ enum IIT_Info { IIT_SUBDIVIDE4_ARG = 45, IIT_VEC_OF_BITCASTS_TO_INT = 46, IIT_V128 = 47, - IIT_BF16 = 48 + IIT_BF16 = 48, + IIT_STRUCT9 = 49 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -469,6 +470,7 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int, case 6: TypeSig.push_back(IIT_STRUCT6); break; case 7: TypeSig.push_back(IIT_STRUCT7); break; case 8: TypeSig.push_back(IIT_STRUCT8); break; + case 9: TypeSig.push_back(IIT_STRUCT9); break; default: llvm_unreachable("Unhandled case in struct"); } From e39d7884a1f5c5c7136ba2e493e9ac313ccc78ed Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 30 Sep 2020 10:09:34 +0000 Subject: [PATCH 138/544] [gn build] Port 413577a8790 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index c43e531fc7180..811faf52b1831 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -143,6 +143,8 @@ copy("Headers") { "inttypes.h", "invpcidintrin.h", "iso646.h", + "keylocker_wide_intrin.h", + "keylockerintrin.h", "limits.h", "lwpintrin.h", "lzcntintrin.h", From ec3f24d4538d1c262377331c7b35ea66e023cf98 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 11:13:54 +0100 Subject: [PATCH 139/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - assert for correct bit providence indices. NFCI. As suggested by @spatel on D88316 --- llvm/lib/Transforms/Utils/Local.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 61f4dffb641ca..8ff11ba4cab47 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3027,6 +3027,9 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( if (!Res) return false; auto &BitProvenance = Res->Provenance; + assert(all_of(BitProvenance, + [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) && + "Illegal bit provenance index"); // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. From af47d40b9c68744eb66aa2ef779065e946aaa099 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 12:07:19 +0100 Subject: [PATCH 140/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - recognise zext(bswap(trunc(x))) patterns (PR39793) PR39793 demonstrated an issue where we fail to recognize 'partial' bswap patterns of the lower bytes of an integer source. In fact, most of this is already in place collectBitParts suitably tags zero bits, so we just need to correctly handle this case by finding the zero'd upper bits and reducing the bswap pattern just to the active demanded bits. Differential Revision: https://reviews.llvm.org/D88316 --- llvm/lib/Transforms/Utils/Local.cpp | 21 +++--- llvm/test/Transforms/InstCombine/bswap.ll | 82 +++++++++++------------ 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 8ff11ba4cab47..4eb458d217e02 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3010,29 +3010,34 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( IntegerType *ITy = dyn_cast(I->getType()); if (!ITy || ITy->getBitWidth() > 128) return false; // Can't do vectors or integers > 128 bits. - unsigned BW = ITy->getBitWidth(); - unsigned DemandedBW = BW; IntegerType *DemandedTy = ITy; - if (I->hasOneUse()) { - if (TruncInst *Trunc = dyn_cast(I->user_back())) { + if (I->hasOneUse()) + if (auto *Trunc = dyn_cast(I->user_back())) DemandedTy = cast(Trunc->getType()); - DemandedBW = DemandedTy->getBitWidth(); - } - } // Try to find all the pieces corresponding to the bswap. std::map> BPS; auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); if (!Res) return false; - auto &BitProvenance = Res->Provenance; + ArrayRef BitProvenance = Res->Provenance; assert(all_of(BitProvenance, [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) && "Illegal bit provenance index"); + // If the upper bits are zero, then attempt to perform as a truncated op. + if (BitProvenance[BitProvenance.size() - 1] == BitPart::Unset) { + while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) + BitProvenance = BitProvenance.drop_back(); + if (BitProvenance.empty()) + return false; // TODO - handle null value? + DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size()); + } + // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. + unsigned DemandedBW = DemandedTy->getBitWidth(); bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; for (unsigned i = 0; i < DemandedBW; ++i) { OKForBSwap &= diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 41d3c5b58c2f4..5f9a8078f5415 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -187,8 +187,8 @@ define i32 @bswap32_shl_first_extra_use(i32 %x) { define i16 @test8(i16 %a) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: ret i16 [[REV]] +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] ; %conv = zext i16 %a to i32 %shr = lshr i16 %a, 8 @@ -201,8 +201,8 @@ define i16 @test8(i16 %a) { define i16 @test9(i16 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: ret i16 [[REV]] +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] ; %conv = zext i16 %a to i32 %shr = lshr i32 %conv, 8 @@ -229,18 +229,10 @@ define i16 @test10(i32 %a) { define i64 @PR39793_bswap_u64_as_u32(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u32( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24 -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 16711680 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 4278190080 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]] -; CHECK-NEXT: ret i64 [[TMP12]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[REV:%.*]] = call i32 @llvm.bswap.i32(i32 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[REV]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 24 %3 = and i64 %2, 255 @@ -258,13 +250,10 @@ define i64 @PR39793_bswap_u64_as_u32(i64 %0) { define i16 @PR39793_bswap_u64_as_u32_trunc(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u32_trunc( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24 -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i16 -; CHECK-NEXT: ret i16 [[TMP7]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[REV:%.*]] = call i32 @llvm.bswap.i32(i32 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[REV]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] ; %2 = lshr i64 %0, 24 %3 = and i64 %2, 255 @@ -283,12 +272,10 @@ define i16 @PR39793_bswap_u64_as_u32_trunc(i64 %0) { define i64 @PR39793_bswap_u64_as_u16(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u16( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 8 %3 = and i64 %2, 255 @@ -300,9 +287,9 @@ define i64 @PR39793_bswap_u64_as_u16(i64 %0) { define i8 @PR39793_bswap_u64_as_u16_trunc(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u16_trunc( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i8 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[REV1:%.*]] = lshr i64 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[REV1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %2 = lshr i64 %0, 8 %3 = and i64 %2, 255 @@ -313,14 +300,27 @@ define i8 @PR39793_bswap_u64_as_u16_trunc(i64 %0) { ret i8 %7 } +define i50 @PR39793_bswap_u50_as_u16(i50 %0) { +; CHECK-LABEL: @PR39793_bswap_u50_as_u16( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i50 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i50 +; CHECK-NEXT: ret i50 [[TMP2]] +; + %2 = lshr i50 %0, 8 + %3 = and i50 %2, 255 + %4 = shl i50 %0, 8 + %5 = and i50 %4, 65280 + %6 = or i50 %3, %5 + ret i50 %6 +} + define i32 @PR39793_bswap_u32_as_u16(i32 %0) { ; CHECK-LABEL: @PR39793_bswap_u32_as_u16( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP3]], [[TMP5]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = lshr i32 %0, 8 %3 = and i32 %2, 255 @@ -332,9 +332,9 @@ define i32 @PR39793_bswap_u32_as_u16(i32 %0) { define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { ; CHECK-LABEL: @PR39793_bswap_u32_as_u16_trunc( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[REV1:%.*]] = lshr i32 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[REV1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %2 = lshr i32 %0, 8 %3 = and i32 %2, 255 From 14088a6f5d1ae597960833a366beb9acee8d65cb Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Wed, 30 Sep 2020 07:42:43 +0000 Subject: [PATCH 141/544] [mlir] Added support for rank reducing subviews This commit adds support for subviews which enable to reduce resulting rank by dropping static dimensions of size 1. Differential Revision: https://reviews.llvm.org/D88534 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 14 +++ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 99 ++++++++++++++++++- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 3 - mlir/test/IR/core-ops.mlir | 29 ++++++ mlir/test/IR/invalid-ops.mlir | 10 ++ 5 files changed, 147 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 352b7d8fd3d69..ff1a82c265614 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2841,6 +2841,20 @@ def SubViewOp : Std_Op<"subview", [ "ArrayRef attrs = {}">, // Build a SubViewOp with all dynamic entries. OpBuilder< + "OpBuilder &b, OperationState &result, Value source, " + "ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}">, + // Build a SubViewOp with mixed static and dynamic entries + // and custom result type. + OpBuilder< + "OpBuilder &b, OperationState &result, MemRefType resultType, " + "Value source, ArrayRef staticOffsets, " + "ArrayRef staticSizes, ArrayRef staticStrides, " + "ValueRange offsets, ValueRange sizes, " + "ValueRange strides, ArrayRef attrs = {}">, + // Build a SubViewOp with all dynamic entries and custom result type. + OpBuilder< + "OpBuilder &b, OperationState &result, MemRefType resultType, " "Value source, ValueRange offsets, ValueRange sizes, ValueRange strides, " "ArrayRef attrs = {}"> ]; diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index c0dc87210a3f1..1cabf172b7fcc 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2728,15 +2728,47 @@ void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source, staticStridesVector, offsets, sizes, strides, attrs); } +/// Build a SubViewOp as above but with custom result type. +void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, + MemRefType resultType, Value source, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides, ValueRange offsets, + ValueRange sizes, ValueRange strides, + ArrayRef attrs) { + build(b, result, resultType, source, offsets, sizes, strides, + b.getI64ArrayAttr(staticOffsets), b.getI64ArrayAttr(staticSizes), + b.getI64ArrayAttr(staticStrides)); + result.addAttributes(attrs); +} + +/// Build a SubViewOp as above but with custom result type. +void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, + MemRefType resultType, Value source, + ValueRange offsets, ValueRange sizes, + ValueRange strides, + ArrayRef attrs) { + auto sourceMemRefType = source.getType().cast(); + unsigned rank = sourceMemRefType.getRank(); + SmallVector staticOffsetsVector; + staticOffsetsVector.assign(rank, ShapedType::kDynamicStrideOrOffset); + SmallVector staticSizesVector; + staticSizesVector.assign(rank, ShapedType::kDynamicSize); + SmallVector staticStridesVector; + staticStridesVector.assign(rank, ShapedType::kDynamicStrideOrOffset); + build(b, result, resultType, source, staticOffsetsVector, staticSizesVector, + staticStridesVector, offsets, sizes, strides, attrs); +} + /// Verify that a particular offset/size/stride static attribute is well-formed. static LogicalResult verifySubViewOpPart(SubViewOp op, StringRef name, StringRef attrName, ArrayAttr attr, llvm::function_ref isDynamic, ValueRange values) { /// Check static and dynamic offsets/sizes/strides breakdown. - if (attr.size() != op.getRank()) - return op.emitError("expected ") - << op.getRank() << " " << name << " values"; + size_t inputRank = op.source().getType().cast().getRank(); + if (attr.size() != inputRank) + return op.emitError("expected ") << inputRank << " " << name << " values"; unsigned expectedNumDynamicEntries = llvm::count_if(attr.getValue(), [&](Attribute attr) { return isDynamic(attr.cast().getInt()); @@ -2755,6 +2787,62 @@ static SmallVector extractFromI64ArrayAttr(Attribute attr) { })); } +/// Checks if `original` MemRef type can be rank reduced to `reduced` type. +/// This function is slight variant of `is subsequence` algorithm where +/// not matching dimension must be 1. +static bool isRankReducedType(Type originalType, Type reducedType) { + if (originalType == reducedType) + return true; + + MemRefType original = originalType.cast(); + MemRefType reduced = reducedType.cast(); + ArrayRef originalShape = original.getShape(); + ArrayRef reducedShape = reduced.getShape(); + unsigned originalRank = originalShape.size(), + reducedRank = reducedShape.size(); + if (reducedRank > originalRank) + return false; + + unsigned reducedIdx = 0; + SmallVector keepMask(originalRank); + for (unsigned originalIdx = 0; originalIdx < originalRank; ++originalIdx) { + // -2 is never used as a dim size so it will never match. + int reducedVal = reducedIdx < reducedRank ? reducedShape[reducedIdx] : -2; + // Skip matching dims greedily. + if ((keepMask[originalIdx] = originalShape[originalIdx] == reducedVal)) + reducedIdx++; + // 1 is the only non-matching allowed. + else if (originalShape[originalIdx] != 1) + return false; + } + // Must match the reduced rank. + if (reducedIdx != reducedRank) + return false; + + MLIRContext *c = original.getContext(); + int64_t originalOffset, symCounter = 0, dimCounter = 0; + SmallVector originalStrides; + getStridesAndOffset(original, originalStrides, originalOffset); + auto getSymbolOrConstant = [&](int64_t offset) { + return offset == ShapedType::kDynamicStrideOrOffset + ? getAffineSymbolExpr(symCounter++, c) + : getAffineConstantExpr(offset, c); + }; + + AffineExpr expr = getSymbolOrConstant(originalOffset); + for (unsigned i = 0, e = originalStrides.size(); i < e; i++) { + if (keepMask[i]) + expr = expr + getSymbolOrConstant(originalStrides[i]) * + getAffineDimExpr(dimCounter++, c); + } + + auto reducedMap = AffineMap::get(dimCounter, symCounter, expr, c); + return original.getElementType() == reduced.getElementType() && + original.getMemorySpace() == reduced.getMemorySpace() && + (reduced.getAffineMaps().empty() || + reducedMap == reduced.getAffineMaps().front()); +} + /// Verifier for SubViewOp. static LogicalResult verify(SubViewOp op) { auto baseType = op.getBaseMemRefType().cast(); @@ -2790,8 +2878,9 @@ static LogicalResult verify(SubViewOp op) { op.getBaseMemRefType(), extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); - if (op.getType() != expectedType) - return op.emitError("expected result type to be ") << expectedType; + if (!isRankReducedType(expectedType, subViewType)) + return op.emitError("expected result type to be ") + << expectedType << " or a rank-reduced version."; return success(); } diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 332bfbe2f4577..5bf7857a66e8f 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2107,9 +2107,6 @@ LogicalResult mlir::vector::splitFullAndPartialTransferPrecondition( // TODO: expand support to these 2 cases. if (!xferOp.permutation_map().isMinorIdentity()) return failure(); - // TODO: relax this precondition. This will require rank-reducing subviews. - if (xferOp.getMemRefType().getRank() != xferOp.getTransferRank()) - return failure(); // Must have some masked dimension to be a candidate for splitting. if (!xferOp.hasMaskedDim()) return failure(); diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index f182936c87032..5e3959af29ddc 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -19,6 +19,8 @@ // CHECK-DAG: #[[$SUBVIEW_MAP3:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2 + 8)> // CHECK-DAG: #[[$SUBVIEW_MAP4:map[0-9]+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> // CHECK-DAG: #[[$SUBVIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1 * 2)> +// CHECK-DAG: #[[$SUBVIEW_MAP6:map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0 * 36 + d1 * 36 + d2 * 4 + d3 * 4 + d4)> +// CHECK-DAG: #[[$SUBVIEW_MAP7:map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4 + d4 * s5 + d5 * s6)> // CHECK-LABEL: func @func_with_ops // CHECK-SAME: %[[ARG:.*]]: f32 @@ -797,6 +799,33 @@ func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %11 = subview %9[%arg1, %arg2][4, 4][2, 2] : memref<16x4xf32> to memref<4x4xf32, offset: ?, strides:[8, 2]> + %12 = alloc() : memref<1x9x1x4x1xf32, affine_map<(d0, d1, d2, d3, d4) -> (36 * d0 + 36 * d1 + 4 * d2 + 4 * d3 + d4)>> + // CHECK: subview %12[%arg1, %arg1, %arg1, %arg1, %arg1] + // CHECK-SAME: [1, 9, 1, 4, 1] [%arg2, %arg2, %arg2, %arg2, %arg2] : + // CHECK-SAME: memref<1x9x1x4x1xf32, #[[$SUBVIEW_MAP6]]> to memref<9x4xf32, #[[$SUBVIEW_MAP2]]> + %13 = subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, offset: 0, strides: [36, 36, 4, 4, 1]> to memref<9x4xf32, offset: ?, strides: [?, ?]> + // CHECK: subview %12[%arg1, %arg1, %arg1, %arg1, %arg1] + // CHECK-SAME: [1, 9, 1, 4, 1] [%arg2, %arg2, %arg2, %arg2, %arg2] : + // CHECK-SAME: memref<1x9x1x4x1xf32, #[[$SUBVIEW_MAP6]]> to memref<1x9x4xf32, #[[$BASE_MAP3]]> + %14 = subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, offset: 0, strides: [36, 36, 4, 4, 1]> to memref<1x9x4xf32, offset: ?, strides: [?, ?, ?]> + + %15 = alloc(%arg1, %arg2)[%c0, %c1, %arg1, %arg0, %arg0, %arg2, %arg2] : memref<1x?x5x1x?x1xf32, affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (s0 + s1 * d0 + s2 * d1 + s3 * d2 + s4 * d3 + s5 * d4 + s6 * d5)>> + // CHECK: subview %15[0, 0, 0, 0, 0, 0] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1] : + // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref + %16 = subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref + // CHECK: subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1] : + // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref + %17 = subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref + + %18 = alloc() : memref<1x8xf32> + // CHECK: subview %18[0, 0] [1, 8] [1, 1] : memref<1x8xf32> to memref<8xf32> + %19 = subview %18[0, 0][1, 8][1, 1] : memref<1x8xf32> to memref<8xf32> + + %20 = alloc() : memref<8x16x4xf32> + // CHECK: subview %20[0, 0, 0] [1, 16, 4] [1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32> + %21 = subview %20[0, 0, 0][1, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32> + + %22 = subview %20[3, 4, 2][1, 6, 3][1, 1, 1] : memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]> return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index e02dbca494df6..ab18845bdb532 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1020,6 +1020,16 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { // ----- +func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) { + %0 = alloc() : memref<8x16x4xf32> + // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>'}} + %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] + : memref<8x16x4xf32> to memref<16x4xf32> + return +} + +// ----- + func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) { // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 32 + d2 * 2)>>' are cast incompatible}} %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]> From 3cbd01ddb9372b725dcea3dd5fed21ef5b3d9578 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 12:14:39 +0100 Subject: [PATCH 142/544] [NFC][ARM] Add more LowOverheadLoop tests. --- .../biquad-cascade-default.mir | 396 ++++++++++++++++++ .../biquad-cascade-optsize-strd-lr.mir | 392 +++++++++++++++++ .../biquad-cascade-optsize.mir | 396 ++++++++++++++++++ 3 files changed, 1184 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir new file mode 100644 index 0000000000000..3c37c4a14b717 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir @@ -0,0 +1,396 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 } + + define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) { + bb: + %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32** + %i4 = load i32*, i32** %i, align 4 + %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1 + %i6 = load i32*, i32** %i5, align 4 + %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2 + %i8 = load i32, i32* %i7, align 4 + %i9 = sub i32 31, %i8 + %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3 + %i11 = load i32, i32* %i10, align 4 + br label %bb12 + + bb12: ; preds = %bb74, %bb + %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ] + %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ] + %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ] + %i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ] + %i18 = getelementptr inbounds i32, i32* %i13, i32 5 + %i19 = load i32, i32* %i14, align 4 + %i20 = getelementptr inbounds i32, i32* %i14, i32 1 + %i21 = load i32, i32* %i20, align 4 + %i22 = getelementptr inbounds i32, i32* %i14, i32 2 + %i23 = load i32, i32* %i22, align 4 + %i24 = getelementptr inbounds i32, i32* %i14, i32 3 + %i25 = load i32, i32* %i24, align 4 + %i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3) + br i1 %i26, label %bb27, label %bb74 + + bb27: ; preds = %bb12 + %i28 = getelementptr inbounds i32, i32* %i13, i32 4 + %i29 = load i32, i32* %i28, align 4 + %i30 = getelementptr inbounds i32, i32* %i13, i32 3 + %i31 = load i32, i32* %i30, align 4 + %i32 = getelementptr inbounds i32, i32* %i13, i32 2 + %i33 = load i32, i32* %i32, align 4 + %i34 = getelementptr inbounds i32, i32* %i13, i32 1 + %i35 = load i32, i32* %i34, align 4 + %i36 = load i32, i32* %i13, align 4 + br label %bb37 + + bb37: ; preds = %bb37, %bb27 + %lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ] + %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ] + %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ] + %i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ] + %i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ] + %i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ] + %i43 = phi i32 [ %i19, %bb27 ], [ %i52, %bb37 ] + %i45 = sext i32 %i29 to i64 + %i46 = sext i32 %i31 to i64 + %i47 = sext i32 %i33 to i64 + %i48 = sext i32 %i35 to i64 + %i49 = sext i32 %i36 to i64 + %i50 = zext i32 %i9 to i64 + %i51 = getelementptr inbounds i32, i32* %i38, i32 1 + %i52 = load i32, i32* %i38, align 4 + %i53 = sext i32 %i52 to i64 + %i54 = mul nsw i64 %i53, %i49 + %i55 = sext i32 %i43 to i64 + %i56 = mul nsw i64 %i55, %i48 + %i57 = sext i32 %i42 to i64 + %i58 = mul nsw i64 %i57, %i47 + %i59 = sext i32 %i41 to i64 + %i60 = mul nsw i64 %i59, %i46 + %i61 = sext i32 %i40 to i64 + %i62 = mul nsw i64 %i61, %i45 + %i63 = add i64 %i58, %i56 + %i64 = add i64 %i63, %i60 + %i65 = add i64 %i64, %i62 + %i66 = add i64 %i65, %i54 + %i67 = ashr i64 %i66, %i50 + %i68 = trunc i64 %i67 to i32 + %i69 = getelementptr inbounds i32, i32* %i39, i32 1 + store i32 %i68, i32* %i39, align 4 + %i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) + %i71 = icmp ne i32 %i70, 0 + %lsr.iv.next = add i32 %lsr.iv, -1 + br i1 %i71, label %bb37, label %bb72 + + bb72: ; preds = %bb37 + %i73 = trunc i64 %i67 to i32 + br label %bb74 + + bb74: ; preds = %bb72, %bb12 + %i75 = phi i32 [ %i19, %bb12 ], [ %i52, %bb72 ] + %i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ] + %i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ] + %i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ] + store i32 %i75, i32* %i14, align 4 + %i79 = bitcast i32* %i14 to i8* + %i80 = getelementptr inbounds i8, i8* %i79, i32 4 + %i81 = bitcast i8* %i80 to i32* + store i32 %i76, i32* %i81, align 4 + %i82 = bitcast i32* %i14 to i8* + %i83 = getelementptr inbounds i8, i8* %i82, i32 8 + %i84 = bitcast i8* %i83 to i32* + store i32 %i77, i32* %i84, align 4 + %i85 = getelementptr inbounds i32, i32* %i14, i32 4 + %i86 = bitcast i32* %i14 to i8* + %i87 = getelementptr inbounds i8, i8* %i86, i32 12 + %i88 = bitcast i8* %i87 to i32* + store i32 %i78, i32* %i88, align 4 + %i89 = add i32 %i16, -1 + %i90 = icmp eq i32 %i89, 0 + br i1 %i90, label %bb91, label %bb12 + + bb91: ; preds = %bb74 + ret void + } + + declare i1 @llvm.test.set.loop.iterations.i32(i32) + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) + +... +--- +name: arm_biquad_cascade_df1_q31 +alignment: 2 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + stackSize: 76 + offsetAdjustment: 0 + maxAlignment: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -40, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -44, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -48, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -52, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -56, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -60, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -64, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -68, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -72, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -76, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_biquad_cascade_df1_q31 + ; CHECK: bb.0.bb: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 + ; CHECK: $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 76 + ; CHECK: $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + ; CHECK: $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg + ; CHECK: $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + ; CHECK: renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + ; CHECK: $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.1.bb74 (align 4): + ; CHECK: successors: %bb.6(0x04000000), %bb.2(0x7c000000) + ; CHECK: liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 + ; CHECK: renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg + ; CHECK: t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) + ; CHECK: t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg + ; CHECK: renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg + ; CHECK: $r1 = tMOVr $r12, 14 /* CC::al */, $noreg + ; CHECK: tBcc %bb.6, 0 /* CC::eq */, killed $cpsr + ; CHECK: bb.2.bb12: + ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $r1, $r2, $r3, $r5, $r7, $r8, $r12 + ; CHECK: $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) + ; CHECK: $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + ; CHECK: t2CMPri renamable $r8, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.1, 0 /* CC::eq */, killed $cpsr + ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.3.bb27: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 + ; CHECK: t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) + ; CHECK: tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) + ; CHECK: tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) + ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) + ; CHECK: t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) + ; CHECK: renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + ; CHECK: bb.4.bb37 (align 4): + ; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 + ; CHECK: $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + ; CHECK: renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + ; CHECK: $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: $lr = tMOVr $r8, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + ; CHECK: dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r0 = tMOVr $r7, 14 /* CC::al */, $noreg + ; CHECK: $r4 = tMOVr $r5, 14 /* CC::al */, $noreg + ; CHECK: tBcc %bb.4, 1 /* CC::ne */, killed $cpsr + ; CHECK: tB %bb.5, 14 /* CC::al */, $noreg + ; CHECK: bb.5.bb72: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r2, $r5, $r6, $r7, $r9 + ; CHECK: $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg + ; CHECK: $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg + ; CHECK: $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg + ; CHECK: $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) + ; CHECK: tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + ; CHECK: tB %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.6.bb91: + ; CHECK: $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + bb.0.bb: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 36 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r11, -8 + frame-setup CFI_INSTRUCTION offset $r10, -12 + frame-setup CFI_INSTRUCTION offset $r9, -16 + frame-setup CFI_INSTRUCTION offset $r8, -20 + frame-setup CFI_INSTRUCTION offset $r7, -24 + frame-setup CFI_INSTRUCTION offset $r6, -28 + frame-setup CFI_INSTRUCTION offset $r5, -32 + frame-setup CFI_INSTRUCTION offset $r4, -36 + $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 76 + $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg + $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg + t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg + renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + tB %bb.2, 14 /* CC::al */, $noreg + + bb.1.bb74 (align 4): + successors: %bb.6(0x04000000), %bb.2(0x7c000000) + liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 + + renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg + t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) + t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg + renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg + $r1 = tMOVr $r12, 14 /* CC::al */, $noreg + tBcc %bb.6, 0 /* CC::eq */, killed $cpsr + + bb.2.bb12: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $r1, $r3, $r5, $r7, $r8, $r12, $r2 + + $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) + $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + t2WhileLoopStart renamable $r8, %bb.1, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg + + bb.3.bb27: + successors: %bb.4(0x80000000) + liveins: $r0, $r1, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 + + t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) + renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) + tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) + tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) + tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) + t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) + renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + + bb.4.bb37 (align 4): + successors: %bb.4(0x7c000000), %bb.5(0x04000000) + liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 + + $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg + renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + $lr = tMOVr $r8, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg + early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + renamable $lr = t2LoopDec killed renamable $lr, 1 + renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg + $r0 = tMOVr $r7, 14 /* CC::al */, $noreg + $r4 = tMOVr $r5, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.4, implicit-def dead $cpsr + tB %bb.5, 14 /* CC::al */, $noreg + + bb.5.bb72: + successors: %bb.1(0x80000000) + liveins: $r5, $r6, $r7, $r9, $r2 + + $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg + $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg + $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg + $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) + tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + tB %bb.1, 14 /* CC::al */, $noreg + + bb.6.bb91: + $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir new file mode 100644 index 0000000000000..a847b69c26143 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir @@ -0,0 +1,392 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - -verify-machineinstrs | FileCheck %s +--- | + %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 } + + ; Function Attrs: optsize + define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) #0 { + bb: + %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32** + %i4 = load i32*, i32** %i, align 4 + %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1 + %i6 = load i32*, i32** %i5, align 4 + %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2 + %i8 = load i32, i32* %i7, align 4 + %i9 = sub i32 31, %i8 + %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3 + %i11 = load i32, i32* %i10, align 4 + br label %bb12 + + bb12: ; preds = %bb74, %bb + %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ] + %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ] + %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ] + %i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ] + %i18 = getelementptr inbounds i32, i32* %i13, i32 5 + %i19 = load i32, i32* %i14, align 4 + %i20 = getelementptr inbounds i32, i32* %i14, i32 1 + %i21 = load i32, i32* %i20, align 4 + %i22 = getelementptr inbounds i32, i32* %i14, i32 2 + %i23 = load i32, i32* %i22, align 4 + %i24 = getelementptr inbounds i32, i32* %i14, i32 3 + %i25 = load i32, i32* %i24, align 4 + %i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3) + br i1 %i26, label %bb27, label %bb74 + + bb27: ; preds = %bb12 + %i28 = getelementptr inbounds i32, i32* %i13, i32 4 + %i29 = load i32, i32* %i28, align 4 + %i30 = getelementptr inbounds i32, i32* %i13, i32 3 + %i31 = load i32, i32* %i30, align 4 + %i32 = getelementptr inbounds i32, i32* %i13, i32 2 + %i33 = load i32, i32* %i32, align 4 + %i34 = getelementptr inbounds i32, i32* %i13, i32 1 + %i35 = load i32, i32* %i34, align 4 + %i36 = load i32, i32* %i13, align 4 + br label %bb37 + + bb37: ; preds = %bb37, %bb27 + %lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ] + %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ] + %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ] + %i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ] + %i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ] + %i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ] + %i43 = phi i32 [ %i19, %bb27 ], [ %i52, %bb37 ] + %i45 = sext i32 %i29 to i64 + %i46 = sext i32 %i31 to i64 + %i47 = sext i32 %i33 to i64 + %i48 = sext i32 %i35 to i64 + %i49 = sext i32 %i36 to i64 + %i50 = zext i32 %i9 to i64 + %i51 = getelementptr inbounds i32, i32* %i38, i32 1 + %i52 = load i32, i32* %i38, align 4 + %i53 = sext i32 %i52 to i64 + %i54 = mul nsw i64 %i53, %i49 + %i55 = sext i32 %i43 to i64 + %i56 = mul nsw i64 %i55, %i48 + %i57 = sext i32 %i42 to i64 + %i58 = mul nsw i64 %i57, %i47 + %i59 = sext i32 %i41 to i64 + %i60 = mul nsw i64 %i59, %i46 + %i61 = sext i32 %i40 to i64 + %i62 = mul nsw i64 %i61, %i45 + %i63 = add i64 %i58, %i56 + %i64 = add i64 %i63, %i60 + %i65 = add i64 %i64, %i62 + %i66 = add i64 %i65, %i54 + %i67 = ashr i64 %i66, %i50 + %i68 = trunc i64 %i67 to i32 + %i69 = getelementptr inbounds i32, i32* %i39, i32 1 + store i32 %i68, i32* %i39, align 4 + %i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) + %i71 = icmp ne i32 %i70, 0 + %lsr.iv.next = add i32 %lsr.iv, -1 + br i1 %i71, label %bb37, label %bb72 + + bb72: ; preds = %bb37 + %i73 = trunc i64 %i67 to i32 + br label %bb74 + + bb74: ; preds = %bb72, %bb12 + %i75 = phi i32 [ %i19, %bb12 ], [ %i52, %bb72 ] + %i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ] + %i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ] + %i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ] + store i32 %i75, i32* %i14, align 4 + %i79 = bitcast i32* %i14 to i8* + %i80 = getelementptr inbounds i8, i8* %i79, i32 4 + %i81 = bitcast i8* %i80 to i32* + store i32 %i76, i32* %i81, align 4 + %i82 = bitcast i32* %i14 to i8* + %i83 = getelementptr inbounds i8, i8* %i82, i32 8 + %i84 = bitcast i8* %i83 to i32* + store i32 %i77, i32* %i84, align 4 + %i85 = getelementptr inbounds i32, i32* %i14, i32 4 + %i86 = bitcast i32* %i14 to i8* + %i87 = getelementptr inbounds i8, i8* %i86, i32 12 + %i88 = bitcast i8* %i87 to i32* + store i32 %i78, i32* %i88, align 4 + %i89 = add i32 %i16, -1 + %i90 = icmp eq i32 %i89, 0 + br i1 %i90, label %bb91, label %bb12 + + bb91: ; preds = %bb74 + ret void + } + + declare i1 @llvm.test.set.loop.iterations.i32(i32) #1 + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #1 + + attributes #0 = { optsize "target-cpu"="cortex-m55" } + attributes #1 = { noduplicate nounwind "target-cpu"="cortex-m55" } + +... +--- +name: arm_biquad_cascade_df1_q31 +alignment: 2 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + stackSize: 76 + offsetAdjustment: 0 + maxAlignment: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -40, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -44, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -48, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -52, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -56, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -60, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -64, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -68, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -72, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -76, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_biquad_cascade_df1_q31 + ; CHECK: bb.0.bb: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 + ; CHECK: $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 76 + ; CHECK: $r7, $r5 = t2LDRDi8 $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + ; CHECK: $r6, $r4 = t2LDRDi8 killed $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + ; CHECK: renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r3 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + ; CHECK: bb.1.bb12 (align 4): + ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: liveins: $r1, $r2, $r3, $r4, $r5, $r7 + ; CHECK: $r9, $r8 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) + ; CHECK: dead renamable $lr = nuw t2ADDri renamable $r5, 20, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r6, $r12 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + ; CHECK: $lr = t2WLS renamable $r3, %bb.5 + ; CHECK: bb.2.bb27: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $lr, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 + ; CHECK: t2STRDi8 killed $lr, killed $r7, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) + ; CHECK: renamable $r0 = tLDRi renamable $r5, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) + ; CHECK: renamable $r10 = t2LDRi12 renamable $r5, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + ; CHECK: tSTRspi killed renamable $r0, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: renamable $r0 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) + ; CHECK: tSTRspi killed renamable $r4, $sp, 5, 14 /* CC::al */, $noreg :: (store 4 into %stack.4) + ; CHECK: tSTRspi killed renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + ; CHECK: renamable $r0 = tLDRi renamable $r5, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) + ; CHECK: tSTRspi killed renamable $r0, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: renamable $r0 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) + ; CHECK: tSTRspi killed renamable $r0, $sp, 6, 14 /* CC::al */, $noreg :: (store 4 into %stack.3) + ; CHECK: $r0 = tMOVr killed $r3, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + ; CHECK: bb.3.bb37 (align 4): + ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r6, $r8, $r9, $r10, $r12 + ; CHECK: renamable $r4 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + ; CHECK: $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg + ; CHECK: $r5 = tMOVr $r9, 14 /* CC::al */, $noreg + ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMULL killed $r9, killed renamable $r4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + ; CHECK: $r8 = tMOVr $r5, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r12, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: $r12 = tMOVr $r7, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r3, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r2 = t2STR_POST renamable $r6, killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.3 + ; CHECK: bb.4.bb72: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $r5, $r6, $r7, $r9 + ; CHECK: $r12 = tMOVr killed $r7, 14 /* CC::al */, $noreg + ; CHECK: $r7, $r4 = t2LDRDi8 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %stack.5), (load 4 from %stack.4) + ; CHECK: $lr = t2ADDri $sp, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r8 = tMOVr killed $r5, 14 /* CC::al */, $noreg + ; CHECK: t2LDMIA killed $lr, 14 /* CC::al */, $noreg, def $r2, def $r3, def $lr :: (load 4 from %stack.8), (load 4 from %stack.7), (load 4 from %stack.6) + ; CHECK: bb.5.bb74: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r2, $r3, $r4, $r6, $r7, $r8, $r9, $r12 + ; CHECK: t2STRDi8 killed $r9, killed $r8, $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) + ; CHECK: t2STRDi8 killed $r6, killed $r12, $r7, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + ; CHECK: renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 16, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4, $cpsr = tSUBi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg + ; CHECK: $r5 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: $r1 = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: $sp = frame-destroy tADDspi $sp, 10, 0 /* CC::eq */, $cpsr, implicit $itstate + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 0 /* CC::eq */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc, implicit $sp, implicit killed $r4, implicit killed $r5, implicit killed $r7, implicit killed $itstate + ; CHECK: tB %bb.1, 14 /* CC::al */, $noreg + bb.0.bb: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 36 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r11, -8 + frame-setup CFI_INSTRUCTION offset $r10, -12 + frame-setup CFI_INSTRUCTION offset $r9, -16 + frame-setup CFI_INSTRUCTION offset $r8, -20 + frame-setup CFI_INSTRUCTION offset $r7, -24 + frame-setup CFI_INSTRUCTION offset $r6, -28 + frame-setup CFI_INSTRUCTION offset $r5, -32 + frame-setup CFI_INSTRUCTION offset $r4, -36 + $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 76 + $r7, $r5 = t2LDRDi8 $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + $r6, $r4 = t2LDRDi8 killed $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg + t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r3 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + + bb.1.bb12 (align 4): + successors: %bb.2(0x40000000), %bb.5(0x40000000) + liveins: $r1, $r2, $r3, $r4, $r5, $r7 + + $r9, $r8 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) + renamable $lr = nuw t2ADDri renamable $r5, 20, 14 /* CC::al */, $noreg, $noreg + $r6, $r12 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + t2WhileLoopStart renamable $r3, %bb.5, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.bb27: + successors: %bb.3(0x80000000) + liveins: $lr, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 + + t2STRDi8 killed $lr, killed $r7, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) + renamable $r0 = tLDRi renamable $r5, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) + renamable $r10 = t2LDRi12 renamable $r5, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + tSTRspi killed renamable $r0, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + renamable $r0 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) + tSTRspi killed renamable $r4, $sp, 5, 14 /* CC::al */, $noreg :: (store 4 into %stack.4) + tSTRspi killed renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + renamable $r0 = tLDRi renamable $r5, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) + tSTRspi killed renamable $r0, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + renamable $r0 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) + tSTRspi killed renamable $r0, $sp, 6, 14 /* CC::al */, $noreg :: (store 4 into %stack.3) + $r0 = tMOVr killed $r3, 14 /* CC::al */, $noreg + renamable $r3 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + + bb.3.bb37 (align 4): + successors: %bb.3(0x7c000000), %bb.4(0x04000000) + liveins: $r0, $r1, $r2, $r3, $r6, $r8, $r9, $r10, $r12 + + renamable $r4 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg + $r5 = tMOVr $r9, 14 /* CC::al */, $noreg + $lr = tMOVr $r0, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMULL killed $r9, killed renamable $r4, 14 /* CC::al */, $noreg + renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + renamable $lr = t2LoopDec killed renamable $lr, 1 + renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + $r8 = tMOVr $r5, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + renamable $r6, renamable $r11 = t2SMLAL killed renamable $r12, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + $r12 = tMOVr $r7, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r3, 14 /* CC::al */, $noreg + early-clobber renamable $r2 = t2STR_POST renamable $r6, killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + t2LoopEnd killed renamable $lr, %bb.3, implicit-def dead $cpsr + tB %bb.4, 14 /* CC::al */, $noreg + + bb.4.bb72: + successors: %bb.5(0x80000000) + liveins: $r5, $r6, $r7, $r9 + + $r12 = tMOVr killed $r7, 14 /* CC::al */, $noreg + $r7, $r4 = t2LDRDi8 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %stack.5), (load 4 from %stack.4) + $lr = t2ADDri $sp, 4, 14 /* CC::al */, $noreg, $noreg + $r8 = tMOVr killed $r5, 14 /* CC::al */, $noreg + t2LDMIA killed $lr, 14 /* CC::al */, $noreg, def $r2, def $r3, def $lr :: (load 4 from %stack.8), (load 4 from %stack.7), (load 4 from %stack.6) + + bb.5.bb74: + successors: %bb.1(0x7c000000) + liveins: $lr, $r2, $r3, $r4, $r6, $r7, $r8, $r9, $r12 + + t2STRDi8 killed $r9, killed $r8, $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) + t2STRDi8 killed $r6, killed $r12, $r7, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 16, 14 /* CC::al */, $noreg + renamable $r4, $cpsr = tSUBi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg + $r5 = tMOVr killed $lr, 14 /* CC::al */, $noreg + $r1 = tMOVr $r2, 14 /* CC::al */, $noreg + t2IT 0, 4, implicit-def $itstate + $sp = frame-destroy tADDspi $sp, 10, 0 /* CC::eq */, $cpsr, implicit $itstate + $sp = frame-destroy t2LDMIA_RET $sp, 0 /* CC::eq */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc, implicit $sp, implicit killed $r4, implicit killed $r5, implicit killed $r7, implicit killed $itstate + tB %bb.1, 14 /* CC::al */, $noreg + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir new file mode 100644 index 0000000000000..f9b625c8141e7 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir @@ -0,0 +1,396 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 } + + ; Function Attrs: optsize + define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) #0 { + bb: + %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32** + %i4 = load i32*, i32** %i, align 4 + %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1 + %i6 = load i32*, i32** %i5, align 4 + %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2 + %i8 = load i32, i32* %i7, align 4 + %i9 = sub i32 31, %i8 + %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3 + %i11 = load i32, i32* %i10, align 4 + br label %bb12 + + bb12: ; preds = %bb74, %bb + %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ] + %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ] + %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ] + %i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ] + %i18 = getelementptr inbounds i32, i32* %i13, i32 5 + %i19 = load i32, i32* %i14, align 4 + %i20 = getelementptr inbounds i32, i32* %i14, i32 1 + %i21 = load i32, i32* %i20, align 4 + %i22 = getelementptr inbounds i32, i32* %i14, i32 2 + %i23 = load i32, i32* %i22, align 4 + %i24 = getelementptr inbounds i32, i32* %i14, i32 3 + %i25 = load i32, i32* %i24, align 4 + %i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3) + br i1 %i26, label %bb27, label %bb74 + + bb27: ; preds = %bb12 + %i28 = getelementptr inbounds i32, i32* %i13, i32 4 + %i29 = load i32, i32* %i28, align 4 + %i30 = getelementptr inbounds i32, i32* %i13, i32 3 + %i31 = load i32, i32* %i30, align 4 + %i32 = getelementptr inbounds i32, i32* %i13, i32 2 + %i33 = load i32, i32* %i32, align 4 + %i34 = getelementptr inbounds i32, i32* %i13, i32 1 + %i35 = load i32, i32* %i34, align 4 + %i36 = load i32, i32* %i13, align 4 + br label %bb37 + + bb37: ; preds = %bb37, %bb27 + %lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ] + %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ] + %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ] + %i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ] + %i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ] + %i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ] + %i43 = phi i32 [ %i19, %bb27 ], [ %i52, %bb37 ] + %i45 = sext i32 %i29 to i64 + %i46 = sext i32 %i31 to i64 + %i47 = sext i32 %i33 to i64 + %i48 = sext i32 %i35 to i64 + %i49 = sext i32 %i36 to i64 + %i50 = zext i32 %i9 to i64 + %i51 = getelementptr inbounds i32, i32* %i38, i32 1 + %i52 = load i32, i32* %i38, align 4 + %i53 = sext i32 %i52 to i64 + %i54 = mul nsw i64 %i53, %i49 + %i55 = sext i32 %i43 to i64 + %i56 = mul nsw i64 %i55, %i48 + %i57 = sext i32 %i42 to i64 + %i58 = mul nsw i64 %i57, %i47 + %i59 = sext i32 %i41 to i64 + %i60 = mul nsw i64 %i59, %i46 + %i61 = sext i32 %i40 to i64 + %i62 = mul nsw i64 %i61, %i45 + %i63 = add i64 %i58, %i56 + %i64 = add i64 %i63, %i60 + %i65 = add i64 %i64, %i62 + %i66 = add i64 %i65, %i54 + %i67 = ashr i64 %i66, %i50 + %i68 = trunc i64 %i67 to i32 + %i69 = getelementptr inbounds i32, i32* %i39, i32 1 + store i32 %i68, i32* %i39, align 4 + %i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) + %i71 = icmp ne i32 %i70, 0 + %lsr.iv.next = add i32 %lsr.iv, -1 + br i1 %i71, label %bb37, label %bb72 + + bb72: ; preds = %bb37 + %i73 = trunc i64 %i67 to i32 + br label %bb74 + + bb74: ; preds = %bb72, %bb12 + %i75 = phi i32 [ %i19, %bb12 ], [ %i52, %bb72 ] + %i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ] + %i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ] + %i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ] + store i32 %i75, i32* %i14, align 4 + %i79 = bitcast i32* %i14 to i8* + %i80 = getelementptr inbounds i8, i8* %i79, i32 4 + %i81 = bitcast i8* %i80 to i32* + store i32 %i76, i32* %i81, align 4 + %i82 = bitcast i32* %i14 to i8* + %i83 = getelementptr inbounds i8, i8* %i82, i32 8 + %i84 = bitcast i8* %i83 to i32* + store i32 %i77, i32* %i84, align 4 + %i85 = getelementptr inbounds i32, i32* %i14, i32 4 + %i86 = bitcast i32* %i14 to i8* + %i87 = getelementptr inbounds i8, i8* %i86, i32 12 + %i88 = bitcast i8* %i87 to i32* + store i32 %i78, i32* %i88, align 4 + %i89 = add i32 %i16, -1 + %i90 = icmp eq i32 %i89, 0 + br i1 %i90, label %bb91, label %bb12 + + bb91: ; preds = %bb74 + ret void + } + + ; Function Attrs: noduplicate nounwind + declare i1 @llvm.test.set.loop.iterations.i32(i32) #1 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #1 + + attributes #0 = { optsize "target-cpu"="cortex-m55" } + attributes #1 = { noduplicate nounwind "target-cpu"="cortex-m55" } + +... +--- +name: arm_biquad_cascade_df1_q31 +alignment: 2 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + stackSize: 76 + offsetAdjustment: 0 + maxAlignment: 4 + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -40, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -44, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -48, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -52, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -56, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -60, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -64, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -68, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -72, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -76, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 11, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 12, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 13, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 14, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 15, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 16, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 17, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 18, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_biquad_cascade_df1_q31 + ; CHECK: bb.0.bb: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 + ; CHECK: $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 76 + ; CHECK: $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + ; CHECK: $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg + ; CHECK: $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + ; CHECK: renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + ; CHECK: $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + ; CHECK: bb.1.bb12 (align 4): + ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: liveins: $r1, $r2, $r3, $r5, $r7, $r8, $r12 + ; CHECK: $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) + ; CHECK: $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + ; CHECK: dead $lr = t2WLS renamable $r8, %bb.5 + ; CHECK: bb.2.bb27: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 + ; CHECK: t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) + ; CHECK: tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) + ; CHECK: tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) + ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) + ; CHECK: t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) + ; CHECK: renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + ; CHECK: bb.3.bb37 (align 4): + ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 + ; CHECK: $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + ; CHECK: renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + ; CHECK: $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: $lr = tMOVr $r8, 14 /* CC::al */, $noreg + ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + ; CHECK: renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r0 = tMOVr $r7, 14 /* CC::al */, $noreg + ; CHECK: $r4 = tMOVr $r5, 14 /* CC::al */, $noreg + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.3 + ; CHECK: bb.4.bb72: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $r2, $r5, $r6, $r7, $r9 + ; CHECK: $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg + ; CHECK: $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg + ; CHECK: $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg + ; CHECK: $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) + ; CHECK: tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + ; CHECK: bb.5.bb74: + ; CHECK: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; CHECK: liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 + ; CHECK: renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg + ; CHECK: t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) + ; CHECK: t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg + ; CHECK: renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg + ; CHECK: $r1 = tMOVr $r12, 14 /* CC::al */, $noreg + ; CHECK: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr + ; CHECK: bb.6.bb91: + ; CHECK: $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg + ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + bb.0.bb: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 36 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r11, -8 + frame-setup CFI_INSTRUCTION offset $r10, -12 + frame-setup CFI_INSTRUCTION offset $r9, -16 + frame-setup CFI_INSTRUCTION offset $r8, -20 + frame-setup CFI_INSTRUCTION offset $r7, -24 + frame-setup CFI_INSTRUCTION offset $r6, -28 + frame-setup CFI_INSTRUCTION offset $r5, -32 + frame-setup CFI_INSTRUCTION offset $r4, -36 + $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 76 + $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg + $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg + t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg + renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + + bb.1.bb12 (align 4): + successors: %bb.2(0x40000000), %bb.5(0x40000000) + liveins: $r1, $r3, $r5, $r7, $r8, $r12, $r2 + + $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) + $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + t2WhileLoopStart renamable $r8, %bb.5, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.bb27: + successors: %bb.3(0x80000000) + liveins: $r0, $r1, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 + + t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) + renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) + tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) + tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) + tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) + t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) + renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + + bb.3.bb37 (align 4): + successors: %bb.3(0x7c000000), %bb.4(0x04000000) + liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 + + $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg + renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + $lr = tMOVr $r8, 14 /* CC::al */, $noreg + renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg + early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg + early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + renamable $lr = t2LoopDec killed renamable $lr, 1 + renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg + $r0 = tMOVr $r7, 14 /* CC::al */, $noreg + $r4 = tMOVr $r5, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.3, implicit-def dead $cpsr + tB %bb.4, 14 /* CC::al */, $noreg + + bb.4.bb72: + successors: %bb.5(0x80000000) + liveins: $r5, $r6, $r7, $r9, $r2 + + $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg + $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg + $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg + $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) + tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + + bb.5.bb74: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 + + renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg + t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) + t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg + renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg + $r1 = tMOVr $r12, 14 /* CC::al */, $noreg + tBcc %bb.1, 1 /* CC::ne */, killed $cpsr + + bb.6.bb91: + $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc + +... From 0b17d4754a94b7129c2483762acd586783802b12 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Wed, 30 Sep 2020 07:13:59 +0000 Subject: [PATCH 143/544] [mlir][Linalg] Tile sizes for Conv ops vectorization added as pass arguments Current setup for conv op vectorization does not enable user to specify tile sizes as well as dimensions for vectorization. In this commit we change that by adding tile sizes as pass arguments. Every dimension with corresponding tile size > 1 is automatically vectorized. Differential Revision: https://reviews.llvm.org/D88533 --- .../Dialect/Linalg/Transforms/Transforms.h | 11 ++-- .../Dialect/Linalg/CPU/test-conv-1d-call.mlir | 4 +- .../Linalg/CPU/test-conv-1d-ncw-call.mlir | 4 +- .../Linalg/CPU/test-conv-1d-nwc-call.mlir | 4 +- .../Dialect/Linalg/CPU/test-conv-2d-call.mlir | 4 +- .../Linalg/CPU/test-conv-2d-nchw-call.mlir | 4 +- .../Linalg/CPU/test-conv-2d-nhwc-call.mlir | 4 +- .../Dialect/Linalg/CPU/test-conv-3d-call.mlir | 4 +- .../Linalg/CPU/test-conv-3d-ncdhw-call.mlir | 4 +- .../Linalg/CPU/test-conv-3d-ndhwc-call.mlir | 4 +- .../Linalg/Transforms/Vectorization.cpp | 51 +++++++++---------- .../LinalgToVector/linalg-to-vector.mlir | 2 +- .../lib/Transforms/TestConvVectorization.cpp | 13 ++++- 13 files changed, 59 insertions(+), 54 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index b188fde5d801a..00a094d720767 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -32,7 +32,8 @@ struct TiledLinalgOp { /// Populates patterns for vectorization of all ConvN-D ops. void populateConvVectorizationPatterns( - MLIRContext *context, SmallVectorImpl &patterns); + MLIRContext *context, SmallVectorImpl &patterns, + ArrayRef tileSizes); /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` @@ -549,8 +550,8 @@ struct AffineMinSCFCanonicalizationPattern /// false of size 1. This ensures that the ConvOp can be lowered to vector /// contraction of dimensions marked in the *mask* as true. /// -/// A good example is ConvNHWCOp which is 2D Conv op with channels as the last -/// dimension. For this op we contract last 3 dimensions. +/// A good example for vectorization is ConvNHWCOp which is 2D Conv op +/// with channels as the last dimension. Let's vectorize last 3 dimensions. /// The initial op definition looks like this: /// ``` /// linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : @@ -589,10 +590,6 @@ class ConvOpVectorization : public OpRewritePattern { LogicalResult matchAndRewrite(ConvOp minOp, PatternRewriter &rewriter) const override; - - // TODO: Make these pass arguments. - static const int tileSize = 3; - static const int noTile = 1; }; //===----------------------------------------------------------------------===// diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir index 97ea95c8bcd1a..7cc0875b33539 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir index dcfcc9b62bbc1..7f90ac675f728 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir index 2e79b46801bca..3eb0959ddda16 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir index e271b0a009b6f..787cbf5d268bb 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir index e27c40524fcca..c6236db6a05a2 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir index b5b4a5c82c095..3213b7dc5fe23 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir index 12ea946966603..8020f3ac017f4 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir index e36abc83b700c..830b5402c2a4c 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir index b302b3e0d8bdf..0b25ea09157cd 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir @@ -9,13 +9,13 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" \ -// RUN: -test-conv-vectorization -convert-linalg-to-llvm | \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 9a225dd81c79c..4430c34af1e9e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -385,16 +385,19 @@ LogicalResult ConvOpVectorization::matchAndRewrite( return failure(); SmallVector mapping; - // Fail to apply when the size of not vectorized dimension is not 1 or - // when the size of vectorized dimension is not dimSize. + SmallVector vectorDims; + // Fail to apply when the size of not vectorized dimension is not 1. for (unsigned i = 0; i < N; i++) { if (!mask[i] && (inShape[i] != 1 || kShape[i] != 1)) return failure(); - if (mask[i] && (inShape[i] != tileSize || kShape[i] != tileSize)) + + if (mask[i] && inShape[i] != kShape[i]) return failure(); - if (mask[i]) + if (mask[i]) { mapping.push_back(getAffineDimExpr(i, context)); + vectorDims.push_back(inShape[i]); + } } Value input = op.getInput(0); @@ -407,8 +410,7 @@ LogicalResult ConvOpVectorization::matchAndRewrite( auto map = AffineMap::get(rank, 0, mapping, context); SmallVector zeros(rank, std_constant_index(0)); - auto vecType = - VectorType::get(SmallVector(numDims, tileSize), elemType); + auto vecType = VectorType::get(vectorDims, elemType); auto inputVec = vector_transfer_read(vecType, input, zeros, map); auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map); @@ -443,6 +445,9 @@ populateVectorizationPatterns(OwningRewritePatternList &tilingPatterns, OwningRewritePatternList &vectorizationPatterns, ArrayRef tileSizes, MLIRContext *context) { + if (tileSizes.size() < N) + return; + constexpr static StringRef kTiledMarker = "TILED"; constexpr static StringRef kPromotedMarker = "PROMOTED"; tilingPatterns.insert>( @@ -457,49 +462,41 @@ populateVectorizationPatterns(OwningRewritePatternList &tilingPatterns, SmallVector mask(N); int offset = tileSizes.size() - N; std::transform(tileSizes.begin() + offset, tileSizes.end(), mask.begin(), - [](int64_t i) -> bool { return i != ConvOpConst::noTile; }); + [](int64_t i) -> bool { return i > 1; }); vectorizationPatterns.insert>(context, mask); } void mlir::linalg::populateConvVectorizationPatterns( - MLIRContext *context, SmallVectorImpl &patterns) { - const int64_t tileSize = ConvOpConst::tileSize; - const int64_t noTile = ConvOpConst::noTile; - auto makeTileSizes = [&](unsigned numNoTile, unsigned numTile) { - SmallVector result(numNoTile, noTile); - result.append(numTile, tileSize); - return result; - }; - + MLIRContext *context, SmallVectorImpl &patterns, + ArrayRef tileSizes) { OwningRewritePatternList tiling, promotion, vectorization; - populateVectorizationPatterns( - tiling, promotion, vectorization, - makeTileSizes(/*numNoTile=*/1, /*numTile*/ 1), context); + populateVectorizationPatterns(tiling, promotion, vectorization, + tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, - makeTileSizes(3, 2), context); + tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, - makeTileSizes(3, 2), context); + tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, - makeTileSizes(2, 2), context); + tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, - makeTileSizes(4, 3), context); + tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, - makeTileSizes(4, 3), context); + tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, - makeTileSizes(3, 3), context); + tileSizes, context); populateVectorizationPatterns( - tiling, promotion, vectorization, makeTileSizes(5, 4), context); + tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns( - tiling, promotion, vectorization, makeTileSizes(5, 4), context); + tiling, promotion, vectorization, tileSizes, context); patterns.push_back(std::move(tiling)); patterns.push_back(std::move(promotion)); diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir index eeb2ca31fd2a9..e1bb7f3caabb3 100644 --- a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir +++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-conv-vectorization --cse | FileCheck %s +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,3" --cse | FileCheck %s // CHECK-DAG: #[[$map0:.*]] = affine_map<(d0)[s0] -> (1, -d0 + s0)> // CHECK-DAG: #[[$map1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp index c90d8058de329..79b6464f3b4cb 100644 --- a/mlir/test/lib/Transforms/TestConvVectorization.cpp +++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp @@ -24,6 +24,13 @@ namespace { /// A pass converting MLIR Linalg ops into Vector ops. class TestConvVectorization : public PassWrapper> { +public: + TestConvVectorization() = default; + TestConvVectorization(const TestConvVectorization &) {} + explicit TestConvVectorization(ArrayRef tileSizesParam) { + tileSizes = tileSizesParam; + } + void runOnOperation() override; void getDependentDialects(DialectRegistry ®istry) const override { @@ -33,6 +40,10 @@ class TestConvVectorization registry.insert(); registry.insert(); } + + ListOption tileSizes{ + *this, "tile-sizes", llvm::cl::desc("Vectorization sizes."), + llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; }; } // namespace @@ -47,7 +58,7 @@ void TestConvVectorization::runOnOperation() { target.addLegalOp(); SmallVector stage1Patterns; - linalg::populateConvVectorizationPatterns(context, stage1Patterns); + linalg::populateConvVectorizationPatterns(context, stage1Patterns, tileSizes); OwningRewritePatternList stage2Patterns = linalg::getLinalgTilingCanonicalizationPatterns(context); From 0eab9d5823815c6520697f8d725c402c88e5d050 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 30 Sep 2020 12:39:30 +0100 Subject: [PATCH 144/544] [SCEV] Verify that all mapped SCEV AddRecs refer to valid loops. This check helps to guard against cases where expressions referring to invalidated/deleted loops are not properly invalidated. The additional check is motivated by the reproducer shared for 8fdac7cb7abb and I think in general make sense as a sanity check. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D88166 --- llvm/lib/Analysis/ScalarEvolution.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 756710909ac79..8759f86e031d2 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12005,6 +12005,25 @@ void ScalarEvolution::verify() const { std::abort(); } } + + // Collect all valid loops currently in LoopInfo. + SmallPtrSet ValidLoops; + SmallVector Worklist(LI.begin(), LI.end()); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + if (ValidLoops.contains(L)) + continue; + ValidLoops.insert(L); + Worklist.append(L->begin(), L->end()); + } + // Check for SCEV expressions referencing invalid/deleted loops. + for (auto &KV : ValueExprMap) { + auto *AR = dyn_cast(KV.second); + if (!AR) + continue; + assert(ValidLoops.contains(AR->getLoop()) && + "AddRec references invalid loop"); + } } bool ScalarEvolution::invalidate( From 05290eead3f95e02700890321ccf6719770f91fe Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 12:28:18 +0100 Subject: [PATCH 145/544] InstCombine] collectBitParts - cleanup variable names. NFCI. Fix a number of WShadow warnings (I was used as the instruction and index......) and fix cases to match style. Also, replaced the Bit APInt mask check in AND instructions with a direct APInt[] bit check. --- llvm/lib/Transforms/Utils/Local.cpp | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 4eb458d217e02..463c6f15492bf 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2855,16 +2855,16 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; Result = BitPart(A->Provider, BitWidth); - for (unsigned i = 0; i < A->Provenance.size(); ++i) { - if (A->Provenance[i] != BitPart::Unset && - B->Provenance[i] != BitPart::Unset && - A->Provenance[i] != B->Provenance[i]) + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) { + if (A->Provenance[BitIdx] != BitPart::Unset && + B->Provenance[BitIdx] != BitPart::Unset && + A->Provenance[BitIdx] != B->Provenance[BitIdx]) return Result = None; - if (A->Provenance[i] == BitPart::Unset) - Result->Provenance[i] = B->Provenance[i]; + if (A->Provenance[BitIdx] == BitPart::Unset) + Result->Provenance[BitIdx] = B->Provenance[BitIdx]; else - Result->Provenance[i] = A->Provenance[i]; + Result->Provenance[BitIdx] = A->Provenance[BitIdx]; } return Result; @@ -2901,13 +2901,12 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // unset the appropriate bits. if (I->getOpcode() == Instruction::And && isa(I->getOperand(1))) { - APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); const APInt &AndMask = cast(I->getOperand(1))->getValue(); // Check that the mask allows a multiple of 8 bits for a bswap, for an // early exit. unsigned NumMaskedBits = AndMask.countPopulation(); - if (!MatchBitReversals && NumMaskedBits % 8 != 0) + if (!MatchBitReversals && (NumMaskedBits % 8) != 0) return Result; const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, @@ -2916,10 +2915,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; Result = Res; - for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) // If the AndMask is zero for this bit, clear the bit. - if ((AndMask & Bit) == 0) - Result->Provenance[i] = BitPart::Unset; + if (AndMask[BitIdx] == 0) + Result->Provenance[BitIdx] = BitPart::Unset; return Result; } @@ -2933,10 +2932,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, Result = BitPart(Res->Provider, BitWidth); auto NarrowBitWidth = cast(cast(I)->getSrcTy())->getBitWidth(); - for (unsigned i = 0; i < NarrowBitWidth; ++i) - Result->Provenance[i] = Res->Provenance[i]; - for (unsigned i = NarrowBitWidth; i < BitWidth; ++i) - Result->Provenance[i] = BitPart::Unset; + for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx) + Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; + for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitPart::Unset; return Result; } @@ -2966,11 +2965,12 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) return Result; + unsigned StartBitRHS = BitWidth - ModAmt; Result = BitPart(LHS->Provider, BitWidth); - for (unsigned I = 0; I < (BitWidth - ModAmt); ++I) - Result->Provenance[I + ModAmt] = LHS->Provenance[I]; - for (unsigned I = 0; I < ModAmt; ++I) - Result->Provenance[I] = RHS->Provenance[I + BitWidth - ModAmt]; + for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) + Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; + for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) + Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; return Result; } } @@ -2979,8 +2979,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be // the input value to the bswap/bitreverse. Result = BitPart(V, BitWidth); - for (unsigned i = 0; i < BitWidth; ++i) - Result->Provenance[i] = i; + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitIdx; return Result; } From 413b4998bd722ab671e29e6dff5d458d1869f39b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 13:39:18 +0100 Subject: [PATCH 146/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - use ArrayRef::back() helper. NFCI. Post-commit feedback on D88316 --- llvm/lib/Transforms/Utils/Local.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 463c6f15492bf..a76dc655ddbf6 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3027,7 +3027,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( "Illegal bit provenance index"); // If the upper bits are zero, then attempt to perform as a truncated op. - if (BitProvenance[BitProvenance.size() - 1] == BitPart::Unset) { + if (BitProvenance.back() == BitPart::Unset) { while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) BitProvenance = BitProvenance.drop_back(); if (BitProvenance.empty()) From 3f88c10a6b25668bb99f5eee7867dcbf37df973c Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 12:25:06 +0100 Subject: [PATCH 147/544] [RDA] isSafeToDefRegAt: Look at global uses We weren't looking at global uses of a value, so we could happily overwrite the register incorrectly. Differential Revision: https://reviews.llvm.org/D88554 --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 2 +- .../biquad-cascade-optsize-strd-lr.mir | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index e94e547800a49..6ed1d3872270e 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -678,7 +678,7 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, int PhysReg, if (isRegUsedAfter(MI, PhysReg)) { if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) { SmallPtrSet Uses; - getReachingLocalUses(Def, PhysReg, Uses); + getGlobalUses(Def, PhysReg, Uses); for (auto *Use : Uses) if (!Ignore.count(Use)) return false; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir index a847b69c26143..607cd788930bb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir @@ -226,9 +226,11 @@ body: | ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; CHECK: liveins: $r1, $r2, $r3, $r4, $r5, $r7 ; CHECK: $r9, $r8 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - ; CHECK: dead renamable $lr = nuw t2ADDri renamable $r5, 20, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = nuw t2ADDri renamable $r5, 20, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r6, $r12 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) - ; CHECK: $lr = t2WLS renamable $r3, %bb.5 + ; CHECK: t2CMPri renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.5, 0 /* CC::eq */, killed $cpsr + ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg ; CHECK: bb.2.bb27: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $lr, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 @@ -256,6 +258,7 @@ body: | ; CHECK: renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) ; CHECK: $r8 = tMOVr $r5, 14 /* CC::al */, $noreg @@ -266,7 +269,8 @@ body: | ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r2 = t2STR_POST renamable $r6, killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.3 + ; CHECK: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr + ; CHECK: tB %bb.4, 14 /* CC::al */, $noreg ; CHECK: bb.4.bb72: ; CHECK: successors: %bb.5(0x80000000) ; CHECK: liveins: $r5, $r6, $r7, $r9 From 621c6c89627972d52796e64a9476a7d05f22f2cd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 14:11:43 +0100 Subject: [PATCH 148/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - cleanup bswap/bitreverse detection loop. NFCI. Early out if both pattern matches have failed (or we don't want them). Fix case of bit index iterator (and avoid Wshadow issue). --- llvm/lib/Transforms/Utils/Local.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index a76dc655ddbf6..149755bb88b86 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3038,18 +3038,20 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. unsigned DemandedBW = DemandedTy->getBitWidth(); - bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; - for (unsigned i = 0; i < DemandedBW; ++i) { - OKForBSwap &= - bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW); - OKForBitReverse &= - bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW); + bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; + bool OKForBitReverse = MatchBitReversals; + for (unsigned BitIdx = 0; + (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, + DemandedBW); + OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], + BitIdx, DemandedBW); } Intrinsic::ID Intrin; - if (OKForBSwap && MatchBSwaps) + if (OKForBSwap) Intrin = Intrinsic::bswap; - else if (OKForBitReverse && MatchBitReversals) + else if (OKForBitReverse) Intrin = Intrinsic::bitreverse; else return false; From 08c5720405d5204ec2329b7f6c561062c7dddee2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 14:19:00 +0100 Subject: [PATCH 149/544] [InstCombine] Add PR47191 bswap tests --- llvm/test/Transforms/InstCombine/bswap.ll | 198 ++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 5f9a8078f5415..d04262b8e0558 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -452,3 +452,201 @@ define i32 @funnel_binary(i32 %abcd) { %dcba = or i32 %dczz, %zzba ret i32 %dcba } + +; PR47191 - deep IR trees prevent ADD/XOR instructions being simplified to OR. + +define i64 @PR47191_problem1(i64 %0) { +; CHECK-LABEL: @PR47191_problem1( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 +; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] +; CHECK-NEXT: ret i64 [[TMP22]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = lshr i64 %0, 24 + %6 = and i64 %5, 16711680 + %7 = lshr i64 %0, 8 + %8 = and i64 %7, 4278190080 + %9 = shl i64 %0, 56 + %10 = shl i64 %0, 40 + %11 = and i64 %10, 71776119061217280 + %12 = shl i64 %0, 24 + %13 = and i64 %12, 280375465082880 + %14 = or i64 %9, %2 + %15 = or i64 %14, %4 + %16 = or i64 %15, %6 + %17 = or i64 %16, %8 + %18 = or i64 %17, %11 + %19 = or i64 %18, %13 + %20 = shl i64 %0, 8 + %21 = and i64 %20, 1095216660480 + %22 = add i64 %19, %21 + ret i64 %22 +} + +define i64 @PR47191_problem2(i64 %0) { +; CHECK-LABEL: @PR47191_problem2( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] +; CHECK-NEXT: ret i64 [[TMP22]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = lshr i64 %0, 24 + %6 = and i64 %5, 16711680 + %7 = lshr i64 %0, 8 + %8 = and i64 %7, 4278190080 + %9 = shl i64 %0, 56 + %10 = shl i64 %0, 40 + %11 = and i64 %10, 71776119061217280 + %12 = or i64 %9, %2 + %13 = or i64 %12, %4 + %14 = or i64 %13, %6 + %15 = or i64 %14, %8 + %16 = or i64 %15, %11 + %17 = shl i64 %0, 24 + %18 = and i64 %17, 280375465082880 + %19 = shl i64 %0, 8 + %20 = and i64 %19, 1095216660480 + %21 = or i64 %20, %18 + %22 = xor i64 %21, %16 + ret i64 %22 +} + +define i64 @PR47191_problem3(i64 %0) { +; CHECK-LABEL: @PR47191_problem3( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] +; CHECK-NEXT: ret i64 [[TMP22]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = lshr i64 %0, 24 + %6 = and i64 %5, 16711680 + %7 = lshr i64 %0, 8 + %8 = and i64 %7, 4278190080 + %9 = shl i64 %0, 56 + %10 = shl i64 %0, 40 + %11 = and i64 %10, 71776119061217280 + %12 = or i64 %9, %2 + %13 = or i64 %12, %4 + %14 = or i64 %13, %6 + %15 = or i64 %14, %8 + %16 = or i64 %15, %11 + %17 = shl i64 %0, 24 + %18 = and i64 %17, 280375465082880 + %19 = shl i64 %0, 8 + %20 = and i64 %19, 1095216660480 + %21 = or i64 %20, %18 + %22 = xor i64 %21, %16 + ret i64 %22 +} + +define i64 @PR47191_problem4(i64 %0) { +; CHECK-LABEL: @PR47191_problem4( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] +; CHECK-NEXT: ret i64 [[TMP22]] +; + %2 = lshr i64 %0, 56 + %3 = shl i64 %0, 56 + %4 = or i64 %2, %3 + %5 = lshr i64 %0, 40 + %6 = and i64 %5, 65280 + %7 = or i64 %4, %6 + %8 = shl i64 %0, 40 + %9 = and i64 %8, 71776119061217280 + %10 = or i64 %7, %9 + %11 = lshr i64 %0, 24 + %12 = and i64 %11, 16711680 + %13 = or i64 %10, %12 + %14 = shl i64 %0, 24 + %15 = and i64 %14, 280375465082880 + %16 = or i64 %13, %15 + %17 = lshr i64 %0, 8 + %18 = and i64 %17, 4278190080 + %19 = or i64 %16, %18 + %20 = shl i64 %0, 8 + %21 = and i64 %20, 1095216660480 + %22 = add i64 %19, %21 + ret i64 %22 +} From f794160c6cb7da4b5ef354a91fe498341f651d36 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Wed, 30 Sep 2020 08:27:09 -0400 Subject: [PATCH 150/544] [lldb] Fix FreeBSD Arm Process Plugin build Add a missing include and some definitions in 769533216666. Patch by: Brooks Davis Reviewed by: labath Differential Revision: https://reviews.llvm.org/D88453 --- .../FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp | 1 + .../Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp index 2f4d613f767af..afb92e8484668 100644 --- a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp @@ -14,6 +14,7 @@ #include "ProcessMonitor.h" #include "RegisterContextPOSIXProcessMonitor_arm.h" #include "Plugins/Process/Utility/RegisterContextPOSIX_arm.h" +#include "Plugins/Process/Utility/lldb-arm-register-enums.h" using namespace lldb_private; using namespace lldb; diff --git a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h index 12e1f19d32fac..906926fd9194a 100644 --- a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h +++ b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h @@ -70,6 +70,10 @@ class RegisterContextPOSIXProcessMonitor_arm : public RegisterContextPOSIX_arm, uint32_t NumSupportedHardwareWatchpoints(); private: + RegisterInfoPOSIX_arm::GPR m_gpr_arm; + + RegisterInfoPOSIX_arm::FPU m_fpr; + ProcessMonitor &GetMonitor(); }; From d8563654701c79fb9ab28ecf94567d9934baed05 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 22 Sep 2020 15:10:06 +0100 Subject: [PATCH 151/544] [VPlan] Change recipes to inherit from VPUser instead of a member var. Now that VPUser is not inheriting from VPValue, we can take the next step and turn the recipes that already manage their operands via VPUser into VPUsers directly. This is another small step towards traversing def-use chains in VPlan. This is NFC with respect to the generated code, but makes the interface more powerful. --- .../Transforms/Vectorize/LoopVectorize.cpp | 12 +- llvm/lib/Transforms/Vectorize/VPlan.h | 112 +++++++------ llvm/lib/Transforms/Vectorize/VPlanValue.h | 4 + .../Transforms/Vectorize/VPlanTest.cpp | 147 ++++++++++++++++++ 4 files changed, 209 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c5026c7558b74..73ac508c389a6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7946,19 +7946,19 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, } void VPWidenCallRecipe::execute(VPTransformState &State) { - State.ILV->widenCallInstruction(Ingredient, User, State); + State.ILV->widenCallInstruction(Ingredient, *this, State); } void VPWidenSelectRecipe::execute(VPTransformState &State) { - State.ILV->widenSelectInstruction(Ingredient, User, InvariantCond, State); + State.ILV->widenSelectInstruction(Ingredient, *this, InvariantCond, State); } void VPWidenRecipe::execute(VPTransformState &State) { - State.ILV->widenInstruction(Ingredient, User, State); + State.ILV->widenInstruction(Ingredient, *this, State); } void VPWidenGEPRecipe::execute(VPTransformState &State) { - State.ILV->widenGEP(GEP, User, State.UF, State.VF, IsPtrLoopInvariant, + State.ILV->widenGEP(GEP, *this, State.UF, State.VF, IsPtrLoopInvariant, IsIndexLoopInvariant, State); } @@ -8039,7 +8039,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. - State.ILV->scalarizeInstruction(Ingredient, User, *State.Instance, + State.ILV->scalarizeInstruction(Ingredient, *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { @@ -8061,7 +8061,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane}, + State.ILV->scalarizeInstruction(Ingredient, *this, {Part, Lane}, IsPredicated, State); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9d1368e6c3204..416a79eacfa79 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -678,6 +678,18 @@ class VPRecipeBase : public ilist_node_with_parent { iplist::iterator eraseFromParent(); }; +inline bool VPUser::classof(const VPRecipeBase *Recipe) { + return Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPWidenCallSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPBlendSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPInterleaveSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPReplicateSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; +} + /// This is a concrete Recipe that models a single VPlan-level instruction. /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as @@ -780,17 +792,14 @@ class VPInstruction : public VPUser, public VPValue, public VPRecipeBase { /// VPWidenRecipe is a recipe for producing a copy of vector type its /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. -class VPWidenRecipe : public VPRecipeBase { +class VPWidenRecipe : public VPRecipeBase, public VPUser { /// Hold the instruction to be widened. Instruction &Ingredient; - /// Hold VPValues for the operands of the ingredient. - VPUser User; - public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {} + : VPRecipeBase(VPWidenSC), VPUser(Operands), Ingredient(I) {} ~VPWidenRecipe() override = default; @@ -808,17 +817,14 @@ class VPWidenRecipe : public VPRecipeBase { }; /// A recipe for widening Call instructions. -class VPWidenCallRecipe : public VPRecipeBase { +class VPWidenCallRecipe : public VPRecipeBase, public VPUser { /// Hold the call to be widened. CallInst &Ingredient; - /// Hold VPValues for the arguments of the call. - VPUser User; - public: template VPWidenCallRecipe(CallInst &I, iterator_range CallArguments) - : VPRecipeBase(VPWidenCallSC), Ingredient(I), User(CallArguments) {} + : VPRecipeBase(VPWidenCallSC), VPUser(CallArguments), Ingredient(I) {} ~VPWidenCallRecipe() override = default; @@ -836,14 +842,11 @@ class VPWidenCallRecipe : public VPRecipeBase { }; /// A recipe for widening select instructions. -class VPWidenSelectRecipe : public VPRecipeBase { +class VPWidenSelectRecipe : public VPRecipeBase, public VPUser { private: /// Hold the select to be widened. SelectInst &Ingredient; - /// Hold VPValues for the operands of the select. - VPUser User; - /// Is the condition of the select loop invariant? bool InvariantCond; @@ -851,7 +854,7 @@ class VPWidenSelectRecipe : public VPRecipeBase { template VPWidenSelectRecipe(SelectInst &I, iterator_range Operands, bool InvariantCond) - : VPRecipeBase(VPWidenSelectSC), Ingredient(I), User(Operands), + : VPRecipeBase(VPWidenSelectSC), VPUser(Operands), Ingredient(I), InvariantCond(InvariantCond) {} ~VPWidenSelectRecipe() override = default; @@ -870,20 +873,22 @@ class VPWidenSelectRecipe : public VPRecipeBase { }; /// A recipe for handling GEP instructions. -class VPWidenGEPRecipe : public VPRecipeBase { +class VPWidenGEPRecipe : public VPRecipeBase, public VPUser { GetElementPtrInst *GEP; - /// Hold VPValues for the base and indices of the GEP. - VPUser User; - bool IsPtrLoopInvariant; SmallBitVector IsIndexLoopInvariant; public: + template + VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range Operands) + : VPRecipeBase(VPWidenGEPSC), VPUser(Operands), GEP(GEP), + IsIndexLoopInvariant(GEP->getNumIndices(), false) {} + template VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range Operands, Loop *OrigLoop) - : VPRecipeBase(VPWidenGEPSC), GEP(GEP), User(Operands), + : VPRecipeBase(VPWidenGEPSC), VPUser(Operands), GEP(GEP), IsIndexLoopInvariant(GEP->getNumIndices(), false) { IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand()); for (auto Index : enumerate(GEP->indices())) @@ -953,17 +958,15 @@ class VPWidenPHIRecipe : public VPRecipeBase { /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase { +class VPBlendRecipe : public VPRecipeBase, public VPUser { PHINode *Phi; +public: /// The blend operation is a User of the incoming values and of their /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value /// might be incoming with a full mask for which there is no VPValue. - VPUser User; - -public: VPBlendRecipe(PHINode *Phi, ArrayRef Operands) - : VPRecipeBase(VPBlendSC), Phi(Phi), User(Operands) { + : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -977,17 +980,13 @@ class VPBlendRecipe : public VPRecipeBase { /// Return the number of incoming values, taking into account that a single /// incoming value has no mask. - unsigned getNumIncomingValues() const { - return (User.getNumOperands() + 1) / 2; - } + unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } /// Return incoming value number \p Idx. - VPValue *getIncomingValue(unsigned Idx) const { - return User.getOperand(Idx * 2); - } + VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } /// Return mask number \p Idx. - VPValue *getMask(unsigned Idx) const { return User.getOperand(Idx * 2 + 1); } + VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } /// Generate the phi/select nodes. void execute(VPTransformState &State) override; @@ -999,16 +998,15 @@ class VPBlendRecipe : public VPRecipeBase { /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase { +class VPInterleaveRecipe : public VPRecipeBase, public VPUser { const InterleaveGroup *IG; - VPUser User; public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) { + : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) { if (Mask) - User.addOperand(Mask); + addOperand(Mask); } ~VPInterleaveRecipe() override = default; @@ -1019,14 +1017,14 @@ class VPInterleaveRecipe : public VPRecipeBase { /// Return the address accessed by this recipe. VPValue *getAddr() const { - return User.getOperand(0); // Address is the 1st, mandatory operand. + return getOperand(0); // Address is the 1st, mandatory operand. } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { // Mask is optional and therefore the last, currently 2nd operand. - return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr; + return getNumOperands() == 2 ? getOperand(1) : nullptr; } /// Generate the wide load or store, and shuffles. @@ -1080,13 +1078,10 @@ class VPReductionRecipe : public VPRecipeBase { /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. -class VPReplicateRecipe : public VPRecipeBase { +class VPReplicateRecipe : public VPRecipeBase, public VPUser { /// The instruction being replicated. Instruction *Ingredient; - /// Hold VPValues for the operands of the ingredient. - VPUser User; - /// Indicator if only a single replica per lane is needed. bool IsUniform; @@ -1100,7 +1095,7 @@ class VPReplicateRecipe : public VPRecipeBase { template VPReplicateRecipe(Instruction *I, iterator_range Operands, bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC), Ingredient(I), User(Operands), + : VPRecipeBase(VPReplicateSC), VPUser(Operands), Ingredient(I), IsUniform(IsUniform), IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the @@ -1130,13 +1125,11 @@ class VPReplicateRecipe : public VPRecipeBase { }; /// A recipe for generating conditional branches on the bits of a mask. -class VPBranchOnMaskRecipe : public VPRecipeBase { - VPUser User; - +class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser { public: VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { if (BlockInMask) // nullptr means all-one mask. - User.addOperand(BlockInMask); + addOperand(BlockInMask); } /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -1162,9 +1155,9 @@ class VPBranchOnMaskRecipe : public VPRecipeBase { /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { - assert(User.getNumOperands() <= 1 && "should have either 0 or 1 operands"); + assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); // Mask is optional. - return User.getNumOperands() == 1 ? User.getOperand(0) : nullptr; + return getNumOperands() == 1 ? getOperand(0) : nullptr; } }; @@ -1202,31 +1195,30 @@ class VPPredInstPHIRecipe : public VPRecipeBase { /// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase { +class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPUser { Instruction &Instr; - VPUser User; void setMask(VPValue *Mask) { if (!Mask) return; - User.addOperand(Mask); + addOperand(Mask); } bool isMasked() const { - return (isa(Instr) && User.getNumOperands() == 2) || - (isa(Instr) && User.getNumOperands() == 3); + return (isa(Instr) && getNumOperands() == 2) || + (isa(Instr) && getNumOperands() == 3); } public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Load), User({Addr}) { + : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}), Instr(Load) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Store), - User({Addr, StoredValue}) { + : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}), + Instr(Store) { setMask(Mask); } @@ -1237,21 +1229,21 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { /// Return the address accessed by this recipe. VPValue *getAddr() const { - return User.getOperand(0); // Address is the 1st, mandatory operand. + return getOperand(0); // Address is the 1st, mandatory operand. } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { // Mask is optional and therefore the last operand. - return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr; + return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; } /// Return the address accessed by this recipe. VPValue *getStoredValue() const { assert(isa(Instr) && "Stored value only available for store instructions"); - return User.getOperand(1); // Stored value is the 2nd, mandatory operand. + return getOperand(1); // Stored value is the 2nd, mandatory operand. } /// Generate the wide load/store. diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 3274b6cf97902..50cf1285dd4b3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -31,6 +31,7 @@ class raw_ostream; class Value; class VPSlotTracker; class VPUser; +class VPRecipeBase; // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins @@ -178,6 +179,9 @@ class VPUser { const_operand_range operands() const { return const_operand_range(op_begin(), op_end()); } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *Recipe); }; class VPlan; class VPBasicBlock; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 73e01fade9a97..46d9899cd054c 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -247,5 +247,152 @@ compound=true } } +TEST(VPRecipeTest, CastVPWidenRecipeToVPUser) { + LLVMContext C; + + IntegerType *Int32 = IntegerType::get(C, 32); + auto *AI = + BinaryOperator::CreateAdd(UndefValue::get(Int32), UndefValue::get(Int32)); + VPValue Op1; + VPValue Op2; + SmallVector Args; + Args.push_back(&Op1); + Args.push_back(&Op1); + VPWidenRecipe WidenR(*AI, make_range(Args.begin(), Args.end())); + EXPECT_TRUE(isa(&WidenR)); + VPRecipeBase *WidenRBase = &WidenR; + EXPECT_TRUE(isa(WidenRBase)); + delete AI; +} + +TEST(VPRecipeTest, CastVPWidenCallRecipeToVPUser) { + LLVMContext C; + + IntegerType *Int32 = IntegerType::get(C, 32); + FunctionType *FTy = FunctionType::get(Int32, false); + auto *Call = CallInst::Create(FTy, UndefValue::get(FTy)); + VPValue Op1; + VPValue Op2; + SmallVector Args; + Args.push_back(&Op1); + Args.push_back(&Op2); + VPWidenCallRecipe Recipe(*Call, make_range(Args.begin(), Args.end())); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); + delete Call; +} + +TEST(VPRecipeTest, CastVPWidenSelectRecipeToVPUser) { + LLVMContext C; + + IntegerType *Int1 = IntegerType::get(C, 1); + IntegerType *Int32 = IntegerType::get(C, 32); + auto *SelectI = SelectInst::Create( + UndefValue::get(Int1), UndefValue::get(Int32), UndefValue::get(Int32)); + VPValue Op1; + VPValue Op2; + VPValue Op3; + SmallVector Args; + Args.push_back(&Op1); + Args.push_back(&Op2); + Args.push_back(&Op3); + VPWidenSelectRecipe WidenSelectR(*SelectI, + make_range(Args.begin(), Args.end()), false); + EXPECT_TRUE(isa(&WidenSelectR)); + VPRecipeBase *BaseR = &WidenSelectR; + EXPECT_TRUE(isa(BaseR)); + delete SelectI; +} + +TEST(VPRecipeTest, CastVPWidenGEPRecipeToVPUser) { + LLVMContext C; + + IntegerType *Int32 = IntegerType::get(C, 32); + PointerType *Int32Ptr = PointerType::get(Int32, 0); + auto *GEP = GetElementPtrInst::Create(Int32, UndefValue::get(Int32Ptr), + UndefValue::get(Int32)); + VPValue Op1; + VPValue Op2; + SmallVector Args; + Args.push_back(&Op1); + Args.push_back(&Op2); + VPWidenGEPRecipe Recipe(GEP, make_range(Args.begin(), Args.end())); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); + delete GEP; +} + +TEST(VPRecipeTest, CastVPBlendRecipeToVPUser) { + LLVMContext C; + + IntegerType *Int32 = IntegerType::get(C, 32); + auto *Phi = PHINode::Create(Int32, 1); + VPValue Op1; + VPValue Op2; + SmallVector Args; + Args.push_back(&Op1); + Args.push_back(&Op2); + VPBlendRecipe Recipe(Phi, Args); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); + delete Phi; +} + +TEST(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { + LLVMContext C; + + VPValue Addr; + VPValue Mask; + VPInterleaveRecipe Recipe(nullptr, &Addr, &Mask); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); +} + +TEST(VPRecipeTest, CastVPReplicateRecipeToVPUser) { + LLVMContext C; + + VPValue Op1; + VPValue Op2; + SmallVector Args; + Args.push_back(&Op1); + Args.push_back(&Op2); + + VPReplicateRecipe Recipe(nullptr, make_range(Args.begin(), Args.end()), true, + false); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); +} + +TEST(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) { + LLVMContext C; + + VPValue Mask; + VPBranchOnMaskRecipe Recipe(&Mask); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); +} + +TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUser) { + LLVMContext C; + + IntegerType *Int32 = IntegerType::get(C, 32); + PointerType *Int32Ptr = PointerType::get(Int32, 0); + auto *Load = + new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); + VPValue Addr; + VPValue Mask; + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); + delete Load; +} + } // namespace } // namespace llvm From 762e8f9bbdaf43300dbc75637a8bce1ce643cc06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 30 Sep 2020 14:53:05 +0200 Subject: [PATCH 152/544] [lldb] [Process/NetBSD] Fix operating on ftag register --- .../Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp b/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp index ca4706a656571..af8b2a2ba794f 100644 --- a/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp +++ b/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp @@ -324,7 +324,7 @@ static constexpr int RegNumX86ToX86_64(int regnum) { case lldb_fstat_i386: return lldb_fstat_x86_64; case lldb_ftag_i386: - return lldb_fstat_x86_64; + return lldb_ftag_x86_64; case lldb_fop_i386: return lldb_fop_x86_64; case lldb_fiseg_i386: @@ -651,7 +651,7 @@ NativeRegisterContextNetBSD_x86_64::ReadRegister(const RegisterInfo *reg_info, reg_value = (uint16_t)m_fpr.fxstate.fx_sw; break; case lldb_ftag_x86_64: - reg_value = (uint8_t)m_fpr.fxstate.fx_tw; + reg_value = (uint16_t)m_fpr.fxstate.fx_tw; break; case lldb_fop_x86_64: reg_value = (uint64_t)m_fpr.fxstate.fx_opcode; @@ -939,7 +939,7 @@ Status NativeRegisterContextNetBSD_x86_64::WriteRegister( m_fpr.fxstate.fx_sw = reg_value.GetAsUInt16(); break; case lldb_ftag_x86_64: - m_fpr.fxstate.fx_tw = reg_value.GetAsUInt8(); + m_fpr.fxstate.fx_tw = reg_value.GetAsUInt16(); break; case lldb_fop_x86_64: m_fpr.fxstate.fx_opcode = reg_value.GetAsUInt16(); From d5545a8993489ee426b757482a64c9373cf7cf38 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 14:36:12 +0100 Subject: [PATCH 153/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - remove unnecessary cast. NFCI. --- llvm/lib/Transforms/Utils/Local.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 149755bb88b86..f8e4d34cbf4e3 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3059,9 +3059,8 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( if (ITy != DemandedTy) { Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); Value *Provider = Res->Provider; - IntegerType *ProviderTy = cast(Provider->getType()); // We may need to truncate the provider. - if (DemandedTy != ProviderTy) { + if (DemandedTy != Provider->getType()) { auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy, "trunc", I); InsertedInsts.push_back(Trunc); From 7fcad5583a12026ce19afe487681753ac633064a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 14:39:23 +0100 Subject: [PATCH 154/544] [InstCombine] Remove %tmp variable names from bswap tests Appease update_test_checks script that was complaining about potential %TMP clashes --- llvm/test/Transforms/InstCombine/bswap.ll | 122 +++++++++++----------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index d04262b8e0558..965f149b6d95e 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -5,95 +5,95 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 define i32 @test1(i32 %i) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[I:%.*]]) -; CHECK-NEXT: ret i32 [[TMP12]] +; CHECK-NEXT: [[T12:%.*]] = call i32 @llvm.bswap.i32(i32 [[I:%.*]]) +; CHECK-NEXT: ret i32 [[T12]] ; - %tmp1 = lshr i32 %i, 24 - %tmp3 = lshr i32 %i, 8 - %tmp4 = and i32 %tmp3, 65280 - %tmp5 = or i32 %tmp1, %tmp4 - %tmp7 = shl i32 %i, 8 - %tmp8 = and i32 %tmp7, 16711680 - %tmp9 = or i32 %tmp5, %tmp8 - %tmp11 = shl i32 %i, 24 - %tmp12 = or i32 %tmp9, %tmp11 - ret i32 %tmp12 + %t1 = lshr i32 %i, 24 + %t3 = lshr i32 %i, 8 + %t4 = and i32 %t3, 65280 + %t5 = or i32 %t1, %t4 + %t7 = shl i32 %i, 8 + %t8 = and i32 %t7, 16711680 + %t9 = or i32 %t5, %t8 + %t11 = shl i32 %i, 24 + %t12 = or i32 %t9, %t11 + ret i32 %t12 } define i32 @test2(i32 %arg) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.bswap.i32(i32 [[ARG:%.*]]) -; CHECK-NEXT: ret i32 [[TMP14]] +; CHECK-NEXT: [[T14:%.*]] = call i32 @llvm.bswap.i32(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[T14]] ; - %tmp2 = shl i32 %arg, 24 - %tmp4 = shl i32 %arg, 8 - %tmp5 = and i32 %tmp4, 16711680 - %tmp6 = or i32 %tmp2, %tmp5 - %tmp8 = lshr i32 %arg, 8 - %tmp9 = and i32 %tmp8, 65280 - %tmp10 = or i32 %tmp6, %tmp9 - %tmp12 = lshr i32 %arg, 24 - %tmp14 = or i32 %tmp10, %tmp12 - ret i32 %tmp14 + %t2 = shl i32 %arg, 24 + %t4 = shl i32 %arg, 8 + %t5 = and i32 %t4, 16711680 + %t6 = or i32 %t2, %t5 + %t8 = lshr i32 %arg, 8 + %t9 = and i32 %t8, 65280 + %t10 = or i32 %t6, %t9 + %t12 = lshr i32 %arg, 24 + %t14 = or i32 %t10, %t12 + ret i32 %t14 } define i16 @test3(i16 %s) { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) -; CHECK-NEXT: ret i16 [[TMP5]] +; CHECK-NEXT: [[T5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) +; CHECK-NEXT: ret i16 [[T5]] ; - %tmp2 = lshr i16 %s, 8 - %tmp4 = shl i16 %s, 8 - %tmp5 = or i16 %tmp2, %tmp4 - ret i16 %tmp5 + %t2 = lshr i16 %s, 8 + %t4 = shl i16 %s, 8 + %t5 = or i16 %t2, %t4 + ret i16 %t5 } define i16 @test4(i16 %s) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) -; CHECK-NEXT: ret i16 [[TMP5]] +; CHECK-NEXT: [[T5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) +; CHECK-NEXT: ret i16 [[T5]] ; - %tmp2 = lshr i16 %s, 8 - %tmp4 = shl i16 %s, 8 - %tmp5 = or i16 %tmp4, %tmp2 - ret i16 %tmp5 + %t2 = lshr i16 %s, 8 + %t4 = shl i16 %s, 8 + %t5 = or i16 %t4, %t2 + ret i16 %t5 } define i16 @test5(i16 %a) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[TMP_UPGRD_3:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: ret i16 [[TMP_UPGRD_3]] -; - %tmp = zext i16 %a to i32 - %tmp1 = and i32 %tmp, 65280 - %tmp2 = ashr i32 %tmp1, 8 - %tmp2.upgrd.1 = trunc i32 %tmp2 to i16 - %tmp4 = and i32 %tmp, 255 - %tmp5 = shl i32 %tmp4, 8 - %tmp5.upgrd.2 = trunc i32 %tmp5 to i16 - %tmp.upgrd.3 = or i16 %tmp2.upgrd.1, %tmp5.upgrd.2 - %tmp6 = bitcast i16 %tmp.upgrd.3 to i16 - %tmp6.upgrd.4 = zext i16 %tmp6 to i32 - %retval = trunc i32 %tmp6.upgrd.4 to i16 +; CHECK-NEXT: [[T_UPGRD_3:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[T_UPGRD_3]] +; + %t = zext i16 %a to i32 + %t1 = and i32 %t, 65280 + %t2 = ashr i32 %t1, 8 + %t2.upgrd.1 = trunc i32 %t2 to i16 + %t4 = and i32 %t, 255 + %t5 = shl i32 %t4, 8 + %t5.upgrd.2 = trunc i32 %t5 to i16 + %t.upgrd.3 = or i16 %t2.upgrd.1, %t5.upgrd.2 + %t6 = bitcast i16 %t.upgrd.3 to i16 + %t6.upgrd.4 = zext i16 %t6 to i32 + %retval = trunc i32 %t6.upgrd.4 to i16 ret i16 %retval } ; PR2842 define i32 @test6(i32 %x) nounwind readnone { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: [[T7:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[T7]] ; - %tmp = shl i32 %x, 16 + %t = shl i32 %x, 16 %x.mask = and i32 %x, 65280 - %tmp1 = lshr i32 %x, 16 - %tmp2 = and i32 %tmp1, 255 - %tmp3 = or i32 %x.mask, %tmp - %tmp4 = or i32 %tmp3, %tmp2 - %tmp5 = shl i32 %tmp4, 8 - %tmp6 = lshr i32 %x, 24 - %tmp7 = or i32 %tmp5, %tmp6 - ret i32 %tmp7 + %t1 = lshr i32 %x, 16 + %t2 = and i32 %t1, 255 + %t3 = or i32 %x.mask, %t + %t4 = or i32 %t3, %t2 + %t5 = shl i32 %t4, 8 + %t6 = lshr i32 %x, 24 + %t7 = or i32 %t5, %t6 + ret i32 %t7 } declare void @extra_use(i32) From c722b3259690d3aad20f31d0ffe6c12b1416bccc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 14:54:04 +0100 Subject: [PATCH 155/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - merge the regular/trunc+zext paths. NFCI. There doesn't seem to be any good reason for having a separate path for when we bswap/bitreverse at a smaller size than the destination size - so merge these to make the instruction generation a lot clearer. --- llvm/lib/Transforms/Utils/Local.cpp | 31 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index f8e4d34cbf4e3..0dacb266a063d 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3056,25 +3056,26 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( else return false; - if (ITy != DemandedTy) { - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); - Value *Provider = Res->Provider; - // We may need to truncate the provider. - if (DemandedTy != Provider->getType()) { - auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy, - "trunc", I); - InsertedInsts.push_back(Trunc); - Provider = Trunc; - } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Value *Provider = Res->Provider; + + // We may need to truncate the provider. + if (DemandedTy != Provider->getType()) { + auto *Trunc = + CastInst::Create(Instruction::Trunc, Provider, DemandedTy, "trunc", I); + InsertedInsts.push_back(Trunc); + Provider = Trunc; + } + + auto *CI = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(CI); + + // We may need to zeroextend back to the result type. + if (ITy != CI->getType()) { auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); InsertedInsts.push_back(ExtInst); - return true; } - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); - InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); return true; } From 216af81c39d1cc4e90af7b991d517c4c7acc912e Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 30 Sep 2020 15:45:13 +0200 Subject: [PATCH 156/544] [clangd] Fix invalid UTF8 when extracting doc comments. Differential Revision: https://reviews.llvm.org/D88567 --- clang-tools-extra/clangd/CodeCompletionStrings.cpp | 8 +++++++- .../clangd/unittests/CodeCompletionStringsTests.cpp | 9 +++++++++ .../clangd/unittests/SymbolCollectorTests.cpp | 4 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.cpp b/clang-tools-extra/clangd/CodeCompletionStrings.cpp index ef44c153425aa..d4a3bdafcae05 100644 --- a/clang-tools-extra/clangd/CodeCompletionStrings.cpp +++ b/clang-tools-extra/clangd/CodeCompletionStrings.cpp @@ -12,6 +12,7 @@ #include "clang/AST/RawCommentList.h" #include "clang/Basic/SourceManager.h" #include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/Support/JSON.h" #include #include @@ -86,7 +87,12 @@ std::string getDeclComment(const ASTContext &Ctx, const NamedDecl &Decl) { assert(!Ctx.getSourceManager().isLoadedSourceLocation(RC->getBeginLoc())); std::string Doc = RC->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics()); - return looksLikeDocComment(Doc) ? Doc : ""; + if (!looksLikeDocComment(Doc)) + return ""; + // Clang requires source to be UTF-8, but doesn't enforce this in comments. + if (!llvm::json::isUTF8(Doc)) + Doc = llvm::json::fixUTF8(Doc); + return Doc; } void getSignature(const CodeCompletionString &CCS, std::string *Signature, diff --git a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp index 2531922a5ca17..7aace938b70cb 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "CodeCompletionStrings.h" +#include "TestTU.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -56,6 +57,14 @@ TEST_F(CompletionStringTest, DocumentationWithAnnotation) { "Annotation: Ano\n\nIs this brief?"); } +TEST_F(CompletionStringTest, GetDeclCommentBadUTF8) { + // is not a valid byte here, should be replaced by encoded . + auto TU = TestTU::withCode("/*x\xffy*/ struct X;"); + auto AST = TU.build(); + EXPECT_EQ("x\xef\xbf\xbdy", + getDeclComment(AST.getASTContext(), findDecl(AST, "X"))); +} + TEST_F(CompletionStringTest, MultipleAnnotations) { Builder.AddAnnotation("Ano1"); Builder.AddAnnotation("Ano2"); diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 3940946d8016a..80995baf946f8 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1606,11 +1606,11 @@ TEST_F(SymbolCollectorTest, BadUTF8) { // Extracted from boost/spirit/home/support/char_encoding/iso8859_1.hpp // This looks like UTF-8 and fools clang, but has high-ISO-8859-1 comments. const char *Header = "int PUNCT = 0;\n" - "int types[] = { /* \xa1 */PUNCT };"; + "/* \xa1 */ int types[] = { /* \xa1 */PUNCT };"; CollectorOpts.RefFilter = RefKind::All; CollectorOpts.RefsInHeaders = true; runSymbolCollector(Header, ""); - EXPECT_THAT(Symbols, Contains(QName("types"))); + EXPECT_THAT(Symbols, Contains(AllOf(QName("types"), Doc("\xef\xbf\xbd ")))); EXPECT_THAT(Symbols, Contains(QName("PUNCT"))); // Reference is stored, although offset within line is not reliable. EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "PUNCT").ID, _))); From dfb717da1f794c235b81a985a57dc238c82318e6 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 30 Sep 2020 09:56:55 -0400 Subject: [PATCH 157/544] [PowerPC] Remove support for VRSAVE save/restore/update. After removal of Darwin as a PowerPC subtarget, the VRSAVE save/restore/spill/update code is no longer needed by any supported subtarget, so remove it while keeping support for vrsave and related instruction aliases for inline asm. I've pre-commited tests to document the existing vrsave handling in relation to @llvm.eh.unwind.init and inline asm usage, as well as a test which shows a beahviour change on AIX related to returning vector type as we were wrongly emiting VRSAVE_UPDATE on AIX. --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 192 +----------------- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 69 ------- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 + llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 8 - llvm/lib/Target/PowerPC/PPCInstrInfo.h | 9 +- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 17 +- .../Target/PowerPC/PPCMachineFunctionInfo.h | 6 - llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 59 ------ llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 4 - llvm/lib/Target/PowerPC/README_ALTIVEC.txt | 5 - .../test/CodeGen/PowerPC/aix-vector-return.ll | 2 +- 11 files changed, 14 insertions(+), 361 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 83ac946204b33..340a4f867ced1 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -262,153 +262,11 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( return AIXOffsets32; } -/// RemoveVRSaveCode - We have found that this function does not need any code -/// to manipulate the VRSAVE register, even though it uses vector registers. -/// This can happen when the only registers used are known to be live in or out -/// of the function. Remove all of the VRSAVE related code from the function. -/// FIXME: The removal of the code results in a compile failure at -O0 when the -/// function contains a function call, as the GPR containing original VRSAVE -/// contents is spilled and reloaded around the call. Without the prolog code, -/// the spill instruction refers to an undefined register. This code needs -/// to account for all uses of that GPR. -static void RemoveVRSaveCode(MachineInstr &MI) { - MachineBasicBlock *Entry = MI.getParent(); - MachineFunction *MF = Entry->getParent(); - - // We know that the MTVRSAVE instruction immediately follows MI. Remove it. - MachineBasicBlock::iterator MBBI = MI; - ++MBBI; - assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); - MBBI->eraseFromParent(); - - bool RemovedAllMTVRSAVEs = true; - // See if we can find and remove the MTVRSAVE instruction from all of the - // epilog blocks. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - // If last instruction is a return instruction, add an epilogue - if (I->isReturnBlock()) { - bool FoundIt = false; - for (MBBI = I->end(); MBBI != I->begin(); ) { - --MBBI; - if (MBBI->getOpcode() == PPC::MTVRSAVE) { - MBBI->eraseFromParent(); // remove it. - FoundIt = true; - break; - } - } - RemovedAllMTVRSAVEs &= FoundIt; - } - } - - // If we found and removed all MTVRSAVE instructions, remove the read of - // VRSAVE as well. - if (RemovedAllMTVRSAVEs) { - MBBI = MI; - assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); - --MBBI; - assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); - MBBI->eraseFromParent(); - } - - // Finally, nuke the UPDATE_VRSAVE. - MI.eraseFromParent(); -} - -// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the -// instruction selector. Based on the vector registers that have been used, -// transform this into the appropriate ORI instruction. -static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { - MachineFunction *MF = MI.getParent()->getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - DebugLoc dl = MI.getDebugLoc(); - - const MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned UsedRegMask = 0; - for (unsigned i = 0; i != 32; ++i) - if (MRI.isPhysRegModified(VRRegNo[i])) - UsedRegMask |= 1 << (31-i); - - // Live in and live out values already must be in the mask, so don't bother - // marking them. - for (std::pair LI : MF->getRegInfo().liveins()) { - unsigned RegNo = TRI->getEncodingValue(LI.first); - if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. - } - - // Live out registers appear as use operands on return instructions. - for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); - UsedRegMask != 0 && BI != BE; ++BI) { - const MachineBasicBlock &MBB = *BI; - if (!MBB.isReturnBlock()) - continue; - const MachineInstr &Ret = MBB.back(); - for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = Ret.getOperand(I); - if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) - continue; - unsigned RegNo = TRI->getEncodingValue(MO.getReg()); - UsedRegMask &= ~(1 << (31-RegNo)); - } - } - - // If no registers are used, turn this into a copy. - if (UsedRegMask == 0) { - // Remove all VRSAVE code. - RemoveVRSaveCode(MI); - return; - } - - Register SrcReg = MI.getOperand(1).getReg(); - Register DstReg = MI.getOperand(0).getReg(); - - if ((UsedRegMask & 0xFFFF) == UsedRegMask) { - if (DstReg != SrcReg) - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask); - else - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask); - } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { - if (DstReg != SrcReg) - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask >> 16); - else - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask >> 16); - } else { - if (DstReg != SrcReg) - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask >> 16); - else - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask >> 16); - - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(DstReg, RegState::Kill) - .addImm(UsedRegMask & 0xFFFF); - } - - // Remove the old UPDATE_VRSAVE instruction. - MI.eraseFromParent(); -} - static bool spillsCR(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo(); return FuncInfo->isCRSpilled(); } -static bool spillsVRSAVE(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo(); - return FuncInfo->isVRSAVESpilled(); -} - static bool hasSpills(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo(); return FuncInfo->hasSpills(); @@ -474,7 +332,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, !FI->mustSaveTOC() && // No need to save TOC. !RegInfo->hasBasePointer(MF); // No special alignment. - // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless + // Note: for PPC32 SVR4ABI, we can still generate stackless // code if all local vars are reg-allocated. bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); @@ -775,21 +633,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool isELFv2ABI = Subtarget.isELFv2ABI(); assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); - // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, - // process it. - if (!isSVR4ABI) - for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { - if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { - if (isAIXABI) - report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); - HandleVRSaveUpdate(*MBBI, TII); - break; - } - } - - // Move MBBI back to the beginning of the prologue block. - MBBI = MBB.begin(); - // Work out frame sizes. unsigned FrameSize = determineFrameLayoutAndUpdate(MF); int NegFrameSize = -FrameSize; @@ -2035,7 +1878,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; - bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; SmallVector GPRegs; @@ -2075,8 +1917,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { ; // do nothing, as we already know whether CRs are spilled - } else if (PPC::VRSAVERCRegClass.contains(Reg)) { - HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg) || PPC::SPERCRegClass.contains(Reg)) { // Altivec and SPE are mutually exclusive, but have the same stack @@ -2199,23 +2039,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, LowerBound -= 4; // The CR save area is always 4 bytes long. } - if (HasVRSAVESaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the VRSAVE register class? - // Adjust the frame index of the VRSAVE spill slot. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - - if (PPC::VRSAVERCRegClass.contains(Reg)) { - int FI = CSI[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); - } - } - - LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. - } - // Both Altivec and SPE have the same alignment and padding requirements // within the stack frame. if (HasVRSaveArea) { @@ -2255,8 +2078,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // needed alignment padding. unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || - hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { const TargetRegisterClass &GPRC = PPC::GPRCRegClass; const TargetRegisterClass &G8RC = PPC::G8RCRegClass; const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; @@ -2270,7 +2093,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); // These kinds of spills might need two registers. - if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) + if (spillsCR(MF) || HasAlVars) RS->addScavengingFrameIndex( MFI.CreateStackObject(Size, Alignment, false)); } @@ -2347,9 +2170,6 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. - if (Reg == PPC::VRSAVE) - continue; // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; @@ -2514,10 +2334,6 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters( for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. - if (Reg == PPC::VRSAVE) - continue; - if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) continue; diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 78c1827f4dc74..aece8a7d5182e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -156,9 +156,6 @@ namespace { PPCLowering = Subtarget->getTargetLowering(); SelectionDAGISel::runOnMachineFunction(MF); - if (!Subtarget->isSVR4ABI()) - InsertVRSaveCode(MF); - return true; } @@ -341,8 +338,6 @@ namespace { return true; } - void InsertVRSaveCode(MachineFunction &MF); - StringRef getPassName() const override { return "PowerPC DAG->DAG Pattern Instruction Selection"; } @@ -376,70 +371,6 @@ namespace { } // end anonymous namespace -/// InsertVRSaveCode - Once the entire function has been instruction selected, -/// all virtual registers are created and all machine instructions are built, -/// check to see if we need to save/restore VRSAVE. If so, do it. -void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { - // Check to see if this function uses vector registers, which means we have to - // save and restore the VRSAVE register and update it with the regs we use. - // - // In this case, there will be virtual registers of vector type created - // by the scheduler. Detect them now. - bool HasVectorVReg = false; - for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); - if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { - HasVectorVReg = true; - break; - } - } - if (!HasVectorVReg) return; // nothing to do. - - // If we have a vector register, we want to emit code into the entry and exit - // blocks to save and restore the VRSAVE register. We do this here (instead - // of marking all vector instructions as clobbering VRSAVE) for two reasons: - // - // 1. This (trivially) reduces the load on the register allocator, by not - // having to represent the live range of the VRSAVE register. - // 2. This (more significantly) allows us to create a temporary virtual - // register to hold the saved VRSAVE value, allowing this temporary to be - // register allocated, instead of forcing it to be spilled to the stack. - - // Create two vregs - one to hold the VRSAVE register that is live-in to the - // function and one for the value after having bits or'd into it. - Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - - const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); - MachineBasicBlock &EntryBB = *Fn.begin(); - DebugLoc dl; - // Emit the following code into the entry block: - // InVRSAVE = MFVRSAVE - // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE - // MTVRSAVE UpdatedVRSAVE - MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point - BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); - BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), - UpdatedVRSAVE).addReg(InVRSAVE); - BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); - - // Find all return blocks, outputting a restore in each epilog. - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (BB->isReturnBlock()) { - IP = BB->end(); --IP; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = IP; - while (I2 != BB->begin() && (--I2)->isTerminator()) - IP = I2; - - // Emit: MTVRSAVE InVRSave - BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); - } - } -} - /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. /// diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 236aca230477b..75b5ec9ec13ae 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7660,6 +7660,10 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue Arg = OutVals[RealResIdx]; + if (Subtarget.isAIXABI() && + (VA.getLocVT().isVector() || VA.getValVT().isVector())) + report_fatal_error("Returning vector types not yet supported on AIX."); + switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 3aac3de395510..469487eb6f7f6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1374,8 +1374,6 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) { OpcodeIndex = SOK_VectorFloat8Spill; } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_VectorFloat4Spill; - } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VRSaveSpill; } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SpillToVSR; } else { @@ -1414,9 +1412,6 @@ void PPCInstrInfo::StoreRegToStackSlot( PPC::CRBITRCRegClass.hasSubClassEq(RC)) FuncInfo->setSpillsCR(); - if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) - FuncInfo->setSpillsVRSAVE(); - if (isXFormMemOp(Opcode)) FuncInfo->setHasNonRISpills(); } @@ -1472,9 +1467,6 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, PPC::CRBITRCRegClass.hasSubClassEq(RC)) FuncInfo->setSpillsCR(); - if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) - FuncInfo->setSpillsVRSAVE(); - if (isXFormMemOp(Opcode)) FuncInfo->setHasNonRISpills(); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 77ee236020a8a..e3e87022e97c3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -122,7 +122,6 @@ enum SpillOpcodeKey { SOK_VSXVectorSpill, SOK_VectorFloat8Spill, SOK_VectorFloat4Spill, - SOK_VRSaveSpill, SOK_SpillToVSR, SOK_SPESpill, SOK_LastOpcodeSpill // This must be last on the enum. @@ -133,20 +132,20 @@ enum SpillOpcodeKey { { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \ - PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD, PPC::EVLDD \ + PPC::SPILLTOVSR_LD, PPC::EVLDD \ } #define Pwr9LoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ - PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD \ + PPC::DFLOADf32, PPC::SPILLTOVSR_LD \ } #define Pwr8StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ - PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \ + PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \ PPC::SPILLTOVSR_ST, PPC::EVSTDD \ } @@ -154,7 +153,7 @@ enum SpillOpcodeKey { { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ - PPC::SPILL_VRSAVE, PPC::SPILLTOVSR_ST \ + PPC::SPILLTOVSR_ST \ } // Initialize arrays for load and store spill opcodes on supported subtargets. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f10176e658440..ddb8c25b6a7fc 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1439,10 +1439,7 @@ def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2 "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } - -def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS), - "UPDATE_VRSAVE $rD, $rS", []>; -} +} // hasCtrlDep let Defs = [R1], Uses = [R1] in def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", @@ -2919,18 +2916,6 @@ let isCodeGenOnly = 1 in { def : InstAlias<"mtvrsave $rS", (MTVRSAVE gprc:$rS)>; def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>; -// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, -// so we'll need to scavenge a register for it. -let mayStore = 1 in -def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), - "#SPILL_VRSAVE", []>; - -// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously -// spilled), so we'll need to scavenge a register for it. -let mayLoad = 1 in -def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), - "#RESTORE_VRSAVE", []>; - let hasSideEffects = 0 in { // mtocrf's input needs to be prepared by shifting by an amount dependent // on the cr register selected. Thus, post-ra anti-dep breaking must not diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 29ca53e273d75..01dff9a1befc3 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -69,9 +69,6 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// disabled. bool DisableNonVolatileCR = false; - /// Indicates whether VRSAVE is spilled in the current function. - bool SpillsVRSAVE = false; - /// LRStoreRequired - The bool indicates whether there is some explicit use of /// the LR/LR8 stack slot that is not obvious from scanning the code. This /// requires that the code generator produce a store of LR to the stack on @@ -175,9 +172,6 @@ class PPCFunctionInfo : public MachineFunctionInfo { void setDisableNonVolatileCR() { DisableNonVolatileCR = true; } bool isNonVolatileCRDisabled() const { return DisableNonVolatileCR; } - void setSpillsVRSAVE() { SpillsVRSAVE = true; } - bool isVRSAVESpilled() const { return SpillsVRSAVE; } - void setLRStoreRequired() { LRStoreRequired = true; } bool isLRStoreRequired() const { return LRStoreRequired; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 96666ad58dfe5..2d7545a68f575 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -926,59 +926,6 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, MBB.erase(II); } -void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex) const { - // Get the instruction. - MachineInstr &MI = *II; // ; SPILL_VRSAVE , - // Get the instruction's basic block. - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - const PPCSubtarget &Subtarget = MF.getSubtarget(); - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - DebugLoc dl = MI.getDebugLoc(); - - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); - Register SrcReg = MI.getOperand(0).getReg(); - - BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) - .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); - - addFrameReference( - BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill), - FrameIndex); - - // Discard the pseudo instruction. - MBB.erase(II); -} - -void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, - unsigned FrameIndex) const { - // Get the instruction. - MachineInstr &MI = *II; // ; = RESTORE_VRSAVE - // Get the instruction's basic block. - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - const PPCSubtarget &Subtarget = MF.getSubtarget(); - const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - DebugLoc dl = MI.getDebugLoc(); - - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); - Register DestReg = MI.getOperand(0).getReg(); - assert(MI.definesRegister(DestReg) && - "RESTORE_VRSAVE does not define its destination"); - - addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), - Reg), FrameIndex); - - BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) - .addReg(Reg, RegState::Kill); - - // Discard the pseudo instruction. - MBB.erase(II); -} - bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, Register Reg, int &FrameIdx) const { // For the nonvolatile condition registers (CR2, CR3, CR4) return true to @@ -1110,12 +1057,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if (OpC == PPC::RESTORE_CRBIT) { lowerCRBitRestore(II, FrameIndex); return; - } else if (OpC == PPC::SPILL_VRSAVE) { - lowerVRSAVESpilling(II, FrameIndex); - return; - } else if (OpC == PPC::RESTORE_VRSAVE) { - lowerVRSAVERestore(II, FrameIndex); - return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 155ef58d5dde9..e14fee9343a89 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -119,10 +119,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { unsigned FrameIndex) const; void lowerCRBitRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; - void lowerVRSAVESpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex) const; - void lowerVRSAVERestore(MachineBasicBlock::iterator II, - unsigned FrameIndex) const; bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg, int &FrameIdx) const override; diff --git a/llvm/lib/Target/PowerPC/README_ALTIVEC.txt b/llvm/lib/Target/PowerPC/README_ALTIVEC.txt index c38e019231611..6d32e76ed8d6c 100644 --- a/llvm/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/llvm/lib/Target/PowerPC/README_ALTIVEC.txt @@ -39,11 +39,6 @@ a load/store/lve*x sequence. //===----------------------------------------------------------------------===// -For functions that use altivec AND have calls, we are VRSAVE'ing all call -clobbered regs. - -//===----------------------------------------------------------------------===// - Implement passing vectors by value into calls and receiving them as arguments. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-return.ll b/llvm/test/CodeGen/PowerPC/aix-vector-return.ll index 6c41005028ce6..b4e39d6c05fe1 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-return.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-return.ll @@ -4,7 +4,7 @@ ; RUN: not --crash llc --verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \ ; RUN: -mattr=+altivec 2>&1 < %s | FileCheck %s -; CHECK: LLVM ERROR: UPDATE_VRSAVE is unexpected on AIX. +; CHECK: LLVM ERROR: Returning vector types not yet supported on AIX. define dso_local <4 x i32> @test() local_unnamed_addr #0 { entry: From 43d239d0fadb1f8ea297580ca39dfbee96c913c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Hjort=20=C3=85kerlund?= Date: Wed, 30 Sep 2020 15:24:41 +0200 Subject: [PATCH 158/544] [GlobalISel] Fix incorrect setting of ValNo when splitting Before, for each original argument i, ValNo was set to i + PartIdx, but ValNo is intended to reflect the index of the value before splitting. Hence, ValNo should always be set to i and not consider the PartIdx. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D86511 --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 49d101a81e933..2e2cb575a4b98 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -285,7 +285,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } Args[i].Regs.push_back(Reg); Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full, + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], Args[i].Flags[Part], CCInfo)) { // Still couldn't assign this smaller part type for some reason. return false; @@ -318,7 +318,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full, + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], Args[i].Flags[PartIdx], CCInfo)) return false; } From f33f8a2b30325d89c4b7daef1b7d11d6da38fd56 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 30 Sep 2020 13:26:25 +0200 Subject: [PATCH 159/544] Move AffineMapAttr into BaseOps.td AffineMapAttr is already part of base, it's just impossible to refer to it from ODS without pulling in the definition from Affine dialect. Differential Revision: https://reviews.llvm.org/D88555 --- .../mlir/Dialect/Affine/IR/AffineOps.td | 1 - .../mlir/Dialect/Affine/IR/AffineOpsBase.td | 32 ------------------- .../Dialect/GPU/ParallelLoopMapperAttr.td | 1 - .../mlir/Dialect/Linalg/IR/LinalgOps.td | 1 - .../Dialect/Linalg/IR/LinalgStructuredOps.td | 1 - mlir/include/mlir/Dialect/Vector/VectorOps.td | 1 - mlir/include/mlir/IR/OpBase.td | 14 ++++++++ mlir/test/lib/Dialect/Test/TestOps.td | 1 - 8 files changed, 14 insertions(+), 38 deletions(-) delete mode 100644 mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index c47dcd3d5fe2d..7e065cc38f69b 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -13,7 +13,6 @@ #ifndef AFFINE_OPS #define AFFINE_OPS -include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/Dialect/StandardOps/IR/StandardOpsBase.td" include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td deleted file mode 100644 index 2883072d4aa98..0000000000000 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOpsBase.td +++ /dev/null @@ -1,32 +0,0 @@ -//===- AffineOpsBase.td - Affine operation definitions -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines base support for MLIR affine operations. -// -//===----------------------------------------------------------------------===// - -#ifndef AFFINE_OPS_BASE -#define AFFINE_OPS_BASE - -include "mlir/IR/OpBase.td" - -// Attributes containing affine maps. -def AffineMapAttr : Attr< - CPred<"$_self.isa()">, "AffineMap attribute"> { - let storageType = [{ AffineMapAttr }]; - let returnType = [{ AffineMap }]; - let valueType = Index; - let constBuilderCall = "AffineMapAttr::get($0)"; -} - -def AffineMapArrayAttr : TypedArrayAttrBase { - let constBuilderCall = "$_builder.getAffineMapArrayAttr($0)"; -} - -#endif // AFFINE_OPS_BASE diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td index c0380739d1f87..daf2d6c6286b2 100644 --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td @@ -14,7 +14,6 @@ #ifndef PARALLEL_LOOP_MAPPER_ATTR #define PARALLEL_LOOP_MAPPER_ATTR -include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/Dialect/GPU/GPUBase.td" def BlockX : I64EnumAttrCase<"BlockX", 0>; diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index a7855e6327b20..d74e59145705e 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -13,7 +13,6 @@ #ifndef LINALG_OPS #define LINALG_OPS -include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/Dialect/Linalg/IR/LinalgBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/ViewLikeInterface.td" diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index d123229337370..9c8197c45ec82 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -14,7 +14,6 @@ #ifndef LINALG_STRUCTURED_OPS #define LINALG_STRUCTURED_OPS -include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/Dialect/Linalg/IR/LinalgBase.td" include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td" include "mlir/Interfaces/CopyOpInterface.td" diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index ecac0a3d4b1f3..f74c8687bf531 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -13,7 +13,6 @@ #ifndef VECTOR_OPS #define VECTOR_OPS -include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/VectorInterfaces.td" diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index f1befa4d980b5..eaaf5b75230ea 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1381,6 +1381,15 @@ def StringElementsAttr : ElementsAttrBase< let convertFromStorage = "$_self"; } +// Attributes containing affine maps. +def AffineMapAttr : Attr< +CPred<"$_self.isa<::mlir::AffineMapAttr>()">, "AffineMap attribute"> { + let storageType = [{::mlir::AffineMapAttr }]; + let returnType = [{ ::mlir::AffineMap }]; + let valueType = Index; + let constBuilderCall = "::mlir::AffineMapAttr::get($0)"; +} + // Base class for array attributes. class ArrayAttrBase : Attr { @@ -1410,6 +1419,11 @@ class TypedArrayAttrBase: ArrayAttrBase< Attr elementAttr = element; } +def AffineMapArrayAttr : TypedArrayAttrBase { + let constBuilderCall = "$_builder.getAffineMapArrayAttr($0)"; +} + def BoolArrayAttr : TypedArrayAttrBase { let constBuilderCall = "$_builder.getBoolArrayAttr($0)"; diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 6f3c8f5aee680..73610457cf7b8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -9,7 +9,6 @@ #ifndef TEST_OPS #define TEST_OPS -include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/IR/OpBase.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/RegionKindInterface.td" From dc261d23d07cccfa7b10a3d1a43903138aee94dc Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 30 Sep 2020 16:30:18 +0200 Subject: [PATCH 160/544] [sanitizers] Fix internal__exit on Solaris `TestCases/log-path_test.cpp` currently `FAIL`s on Solaris: $ env ASAN_OPTIONS=log_path=`for((i=0;i<10000;i++)); do echo -n $i; done` ./log-path_test.cpp.tmp ==5031==ERROR: Path is too long: 01234567... Segmentation Fault (core dumped) The `SEGV` happens here: Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1 (LWP 1)] 0x00000000 in ?? () (gdb) where #0 0x00000000 in ?? () #1 0x080a1e63 in __interceptor__exit (status=1) at /vol/gcc/src/llvm/llvm/local/projects/compiler-rt/lib/asan/../sanitizer_common/sanitizer_common_interceptors.inc:3808 #2 0x08135ea8 in __sanitizer::internal__exit (exitcode=1) at /vol/gcc/src/llvm/llvm/local/projects/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cc:139 when `__interceptor__exit` tries to call `__interception::real__exit` which is `NULL` at this point because the interceptors haven't been initialized yet. Ultimately, the problem lies elsewhere, however: `internal__exit` in `sanitizer_solaris.cpp` calls `_exit` itself since there doesn't exit a non-intercepted version in `libc`. Using the `syscall` interface instead isn't usually an option on Solaris because that interface isn't stable. However, in the case of `SYS_exit` it can be used nonetheless: `SYS_exit` has remained unchanged since at least Solaris 2.5.1 in 1996, and this is what this patch does. Tested on `amd64-pc-solaris2.11`. Differential Revision: https://reviews.llvm.org/D88404 --- .../lib/sanitizer_common/sanitizer_linux.cpp | 20 ++++++++++--------- .../sanitizer_common/sanitizer_solaris.cpp | 4 ---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 0b53210b80773..0e48062828a4b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -426,15 +426,6 @@ uptr internal_sched_yield() { return internal_syscall(SYSCALL(sched_yield)); } -void internal__exit(int exitcode) { -#if SANITIZER_FREEBSD || SANITIZER_OPENBSD - internal_syscall(SYSCALL(exit), exitcode); -#else - internal_syscall(SYSCALL(exit_group), exitcode); -#endif - Die(); // Unreachable. -} - unsigned int internal_sleep(unsigned int seconds) { struct timespec ts; ts.tv_sec = seconds; @@ -451,6 +442,17 @@ uptr internal_execve(const char *filename, char *const argv[], } #endif // !SANITIZER_SOLARIS && !SANITIZER_NETBSD +#if !SANITIZER_NETBSD +void internal__exit(int exitcode) { +#if SANITIZER_FREEBSD || SANITIZER_OPENBSD || SANITIZER_SOLARIS + internal_syscall(SYSCALL(exit), exitcode); +#else + internal_syscall(SYSCALL(exit_group), exitcode); +#endif + Die(); // Unreachable. +} +#endif // !SANITIZER_NETBSD + // ----------------- sanitizer_common.h bool FileExists(const char *filename) { if (ShouldMockFailureToOpen(filename)) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp index 7f9a3e936da72..8789dcd10a954 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp @@ -160,10 +160,6 @@ DECLARE__REAL_AND_INTERNAL(uptr, sched_yield, void) { return sched_yield(); } -DECLARE__REAL_AND_INTERNAL(void, _exit, int exitcode) { - _exit(exitcode); -} - DECLARE__REAL_AND_INTERNAL(uptr, execve, const char *filename, char *const argv[], char *const envp[]) { return _REAL(execve)(filename, argv, envp); From 944691f0b7fa8d99790a4544545e55f014c37295 Mon Sep 17 00:00:00 2001 From: Xiangling Liao Date: Wed, 30 Sep 2020 09:52:41 -0400 Subject: [PATCH 161/544] [NFC][FE] Replace TypeSize with StorageUnitSize On some targets like AIX, last bitfield size is not always equal to last bitfield type size. Some bitfield like bool will have the same alignment as [unsigned]. So we'd like to use a more general term `StorageUnit` to replace type in this field. Differential Revision: https://reviews.llvm.org/D88260 --- clang/lib/AST/RecordLayoutBuilder.cpp | 60 ++++++++++++++------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 715b629e290de..1c185bb082125 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -622,9 +622,10 @@ class ItaniumRecordLayoutBuilder { /// an adjacent bitfield if necessary. The unit in question is usually /// a byte, but larger units are used if IsMsStruct. unsigned char UnfilledBitsInLastUnit; - /// LastBitfieldTypeSize - If IsMsStruct, represents the size of the type - /// of the previous field if it was a bitfield. - unsigned char LastBitfieldTypeSize; + + /// LastBitfieldStorageUnitSize - If IsMsStruct, represents the size of the + /// storage unit of the previous field if it was a bitfield. + unsigned char LastBitfieldStorageUnitSize; /// MaxFieldAlignment - The maximum allowed field alignment. This is set by /// #pragma pack. @@ -693,7 +694,7 @@ class ItaniumRecordLayoutBuilder { UnadjustedAlignment(CharUnits::One()), UseExternalLayout(false), InferAlignment(false), Packed(false), IsUnion(false), IsMac68kAlign(false), IsMsStruct(false), UnfilledBitsInLastUnit(0), - LastBitfieldTypeSize(0), MaxFieldAlignment(CharUnits::Zero()), + LastBitfieldStorageUnitSize(0), MaxFieldAlignment(CharUnits::Zero()), DataSize(0), NonVirtualSize(CharUnits::Zero()), NonVirtualAlignment(CharUnits::One()), PreferredNVAlignment(CharUnits::One()), @@ -708,7 +709,7 @@ class ItaniumRecordLayoutBuilder { void LayoutFields(const RecordDecl *D); void LayoutField(const FieldDecl *D, bool InsertExtraPadding); - void LayoutWideBitField(uint64_t FieldSize, uint64_t TypeSize, + void LayoutWideBitField(uint64_t FieldSize, uint64_t StorageUnitSize, bool FieldPacked, const FieldDecl *D); void LayoutBitField(const FieldDecl *D); @@ -1451,7 +1452,7 @@ roundUpSizeToCharAlignment(uint64_t Size, } void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize, - uint64_t TypeSize, + uint64_t StorageUnitSize, bool FieldPacked, const FieldDecl *D) { assert(Context.getLangOpts().CPlusPlus && @@ -1481,7 +1482,7 @@ void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize, // We're not going to use any of the unfilled bits in the last byte. UnfilledBitsInLastUnit = 0; - LastBitfieldTypeSize = 0; + LastBitfieldStorageUnitSize = 0; uint64_t FieldOffset; uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit; @@ -1520,7 +1521,7 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { bool FieldPacked = Packed || D->hasAttr(); uint64_t FieldSize = D->getBitWidthValue(Context); TypeInfo FieldInfo = Context.getTypeInfo(D->getType()); - uint64_t TypeSize = FieldInfo.Width; + uint64_t StorageUnitSize = FieldInfo.Width; unsigned FieldAlign = FieldInfo.Align; // UnfilledBitsInLastUnit is the difference between the end of the @@ -1529,7 +1530,7 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // first bit offset available for non-bitfields). The current data // size in bits is always a multiple of the char size; additionally, // for ms_struct records it's also a multiple of the - // LastBitfieldTypeSize (if set). + // LastBitfieldStorageUnitSize (if set). // The struct-layout algorithm is dictated by the platform ABI, // which in principle could use almost any rules it likes. In @@ -1583,26 +1584,26 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // First, some simple bookkeeping to perform for ms_struct structs. if (IsMsStruct) { // The field alignment for integer types is always the size. - FieldAlign = TypeSize; + FieldAlign = StorageUnitSize; // If the previous field was not a bitfield, or was a bitfield // with a different storage unit size, or if this field doesn't fit into // the current storage unit, we're done with that storage unit. - if (LastBitfieldTypeSize != TypeSize || + if (LastBitfieldStorageUnitSize != StorageUnitSize || UnfilledBitsInLastUnit < FieldSize) { // Also, ignore zero-length bitfields after non-bitfields. - if (!LastBitfieldTypeSize && !FieldSize) + if (!LastBitfieldStorageUnitSize && !FieldSize) FieldAlign = 1; UnfilledBitsInLastUnit = 0; - LastBitfieldTypeSize = 0; + LastBitfieldStorageUnitSize = 0; } } // If the field is wider than its declared type, it follows // different rules in all cases. - if (FieldSize > TypeSize) { - LayoutWideBitField(FieldSize, TypeSize, FieldPacked, D); + if (FieldSize > StorageUnitSize) { + LayoutWideBitField(FieldSize, StorageUnitSize, FieldPacked, D); return; } @@ -1686,7 +1687,7 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // Compute the real offset. if (FieldSize == 0 || (AllowPadding && - (FieldOffset & (FieldAlign-1)) + FieldSize > TypeSize)) { + (FieldOffset & (FieldAlign - 1)) + FieldSize > StorageUnitSize)) { FieldOffset = llvm::alignTo(FieldOffset, FieldAlign); } else if (ExplicitFieldAlign && (MaxFieldAlignmentInBits == 0 || @@ -1700,7 +1701,8 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // Repeat the computation for diagnostic purposes. if (FieldSize == 0 || (AllowPadding && - (UnpackedFieldOffset & (UnpackedFieldAlign-1)) + FieldSize > TypeSize)) + (UnpackedFieldOffset & (UnpackedFieldAlign - 1)) + FieldSize > + StorageUnitSize)) UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign); else if (ExplicitFieldAlign && @@ -1741,11 +1743,11 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // is a zero-width bitfield, in which case just use a size of 1. uint64_t RoundedFieldSize; if (IsMsStruct) { - RoundedFieldSize = - (FieldSize ? TypeSize : Context.getTargetInfo().getCharWidth()); + RoundedFieldSize = (FieldSize ? StorageUnitSize + : Context.getTargetInfo().getCharWidth()); - // Otherwise, allocate just the number of bytes required to store - // the bitfield. + // Otherwise, allocate just the number of bytes required to store + // the bitfield. } else { RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize, Context); } @@ -1757,15 +1759,15 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // We should have cleared UnfilledBitsInLastUnit in every case // where we changed storage units. if (!UnfilledBitsInLastUnit) { - setDataSize(FieldOffset + TypeSize); - UnfilledBitsInLastUnit = TypeSize; + setDataSize(FieldOffset + StorageUnitSize); + UnfilledBitsInLastUnit = StorageUnitSize; } UnfilledBitsInLastUnit -= FieldSize; - LastBitfieldTypeSize = TypeSize; + LastBitfieldStorageUnitSize = StorageUnitSize; - // Otherwise, bump the data size up to include the bitfield, - // including padding up to char alignment, and then remember how - // bits we didn't use. + // Otherwise, bump the data size up to include the bitfield, + // including padding up to char alignment, and then remember how + // bits we didn't use. } else { uint64_t NewSizeInBits = FieldOffset + FieldSize; uint64_t CharAlignment = Context.getTargetInfo().getCharAlign(); @@ -1775,7 +1777,7 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { // The only time we can get here for an ms_struct is if this is a // zero-width bitfield, which doesn't count as anything for the // purposes of unfilled bits. - LastBitfieldTypeSize = 0; + LastBitfieldStorageUnitSize = 0; } // Update the size. @@ -1825,7 +1827,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit; // Reset the unfilled bits. UnfilledBitsInLastUnit = 0; - LastBitfieldTypeSize = 0; + LastBitfieldStorageUnitSize = 0; bool FieldPacked = Packed || D->hasAttr(); From 89baeaef2fa9a2441d087a218ac82e11a5d4e548 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 22 Sep 2020 08:55:54 -0400 Subject: [PATCH 162/544] Reapply "RegAllocFast: Rewrite and improve" This reverts commit 73a6a164b84a8195defbb8f5eeb6faecfc478ad4. --- .../SymbolFile/NativePDB/disassembly.cpp | 8 +- llvm/lib/CodeGen/RegAllocFast.cpp | 1272 +++-- .../builtin-return-address-pacret.ll | 58 +- .../GlobalISel/darwin-tls-call-clobber.ll | 5 +- .../CodeGen/AArch64/arm64-fast-isel-br.ll | 2 +- .../CodeGen/AArch64/arm64-fast-isel-call.ll | 3 +- .../arm64-fast-isel-conversion-fallback.ll | 34 +- .../AArch64/arm64-fast-isel-conversion.ll | 56 +- llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll | 23 +- .../test/CodeGen/AArch64/arm64_32-fastisel.ll | 5 +- llvm/test/CodeGen/AArch64/arm64_32-null.ll | 7 +- .../test/CodeGen/AArch64/br-cond-not-merge.ll | 2 +- llvm/test/CodeGen/AArch64/cmpxchg-O0.ll | 38 +- llvm/test/CodeGen/AArch64/combine-loads.ll | 8 +- .../test/CodeGen/AArch64/fast-isel-cmpxchg.ll | 39 +- llvm/test/CodeGen/AArch64/popcount.ll | 62 +- llvm/test/CodeGen/AArch64/swift-return.ll | 16 +- llvm/test/CodeGen/AArch64/swifterror.ll | 45 +- .../AArch64/unwind-preserved-from-mir.mir | 10 +- llvm/test/CodeGen/AArch64/unwind-preserved.ll | 32 +- .../CodeGen/AMDGPU/GlobalISel/inline-asm.ll | 8 +- .../AMDGPU/control-flow-fastregalloc.ll | 54 +- .../test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir | 62 + .../fastregalloc-illegal-subreg-physreg.mir | 27 + .../fastregalloc-self-loop-heuristic.mir | 25 +- .../AMDGPU/indirect-addressing-term.ll | 153 +- .../CodeGen/AMDGPU/mubuf-legalize-operands.ll | 45 +- .../AMDGPU/partial-sgpr-to-vgpr-spills.ll | 1288 +++-- .../AMDGPU/reserve-vgpr-for-sgpr-spill.ll | 8 +- llvm/test/CodeGen/AMDGPU/spill-agpr.mir | 58 +- llvm/test/CodeGen/AMDGPU/spill-m0.ll | 51 +- llvm/test/CodeGen/AMDGPU/spill192.mir | 8 +- .../AMDGPU/unexpected-reg-unit-state.mir | 32 + llvm/test/CodeGen/AMDGPU/wwm-reserved.ll | 41 +- .../CodeGen/ARM/2010-08-04-StackVariable.ll | 3 + llvm/test/CodeGen/ARM/Windows/alloca.ll | 9 +- llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll | 10 +- llvm/test/CodeGen/ARM/cmpxchg-O0.ll | 32 +- llvm/test/CodeGen/ARM/crash-greedy-v6.ll | 10 +- llvm/test/CodeGen/ARM/debug-info-blocks.ll | 3 +- llvm/test/CodeGen/ARM/fast-isel-call.ll | 52 +- llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll | 218 +- .../ARM/fast-isel-ldr-str-thumb-neg-index.ll | 69 +- llvm/test/CodeGen/ARM/fast-isel-select.ll | 32 +- llvm/test/CodeGen/ARM/fast-isel-vararg.ll | 28 +- llvm/test/CodeGen/ARM/ldrd.ll | 9 +- llvm/test/CodeGen/ARM/legalize-bitcast.ll | 28 +- llvm/test/CodeGen/ARM/pr47454.ll | 18 +- llvm/test/CodeGen/ARM/stack-guard-reassign.ll | 7 +- llvm/test/CodeGen/ARM/swifterror.ll | 38 +- llvm/test/CodeGen/ARM/thumb-big-stack.ll | 2070 ++++---- .../CodeGen/Hexagon/vect/vect-load-v4i16.ll | 18 +- llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll | 6 +- llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll | 33 +- llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll | 5 +- .../CodeGen/Mips/GlobalISel/llvm-ir/add.ll | 57 +- .../Mips/GlobalISel/llvm-ir/add_vec.ll | 24 +- .../llvm-ir/aggregate_struct_return.ll | 28 +- .../Mips/GlobalISel/llvm-ir/bitreverse.ll | 270 +- .../Mips/GlobalISel/llvm-ir/bitwise.ll | 132 +- .../CodeGen/Mips/GlobalISel/llvm-ir/branch.ll | 8 +- .../Mips/GlobalISel/llvm-ir/brindirect.ll | 10 +- .../CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll | 18 +- .../CodeGen/Mips/GlobalISel/llvm-ir/call.ll | 18 +- .../CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll | 14 +- .../CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll | 70 +- .../CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll | 86 +- .../Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll | 43 +- .../CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll | 140 +- .../GlobalISel/llvm-ir/float_constants.ll | 20 +- .../GlobalISel/llvm-ir/fptosi_and_fptoui.ll | 116 +- .../Mips/GlobalISel/llvm-ir/global_address.ll | 7 +- .../GlobalISel/llvm-ir/global_address_pic.ll | 12 +- .../CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll | 104 +- .../GlobalISel/llvm-ir/jump_table_and_brjt.ll | 202 +- .../GlobalISel/llvm-ir/load_4_unaligned.ll | 20 +- .../load_split_because_of_memsize_or_align.ll | 336 +- .../llvm-ir/long_ambiguous_chain_s32.ll | 502 +- .../llvm-ir/long_ambiguous_chain_s64.ll | 500 +- .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll | 152 +- .../Mips/GlobalISel/llvm-ir/mul_vec.ll | 24 +- .../CodeGen/Mips/GlobalISel/llvm-ir/phi.ll | 117 +- .../Mips/GlobalISel/llvm-ir/rem_and_div.ll | 96 +- .../CodeGen/Mips/GlobalISel/llvm-ir/select.ll | 73 +- .../GlobalISel/llvm-ir/sitofp_and_uitofp.ll | 96 +- .../GlobalISel/llvm-ir/store_4_unaligned.ll | 16 +- ...store_split_because_of_memsize_or_align.ll | 96 +- .../CodeGen/Mips/GlobalISel/llvm-ir/sub.ll | 65 +- .../Mips/GlobalISel/llvm-ir/sub_vec.ll | 24 +- .../GlobalISel/llvm-ir/test_TypeInfoforMF.ll | 25 +- .../Mips/GlobalISel/llvm-ir/var_arg.ll | 22 +- .../llvm-ir/zextLoad_and_sextLoad.ll | 5 +- .../Mips/GlobalISel/llvm-ir/zext_and_sext.ll | 4 +- llvm/test/CodeGen/Mips/atomic-min-max.ll | 4704 ++++++++--------- llvm/test/CodeGen/Mips/atomic.ll | 1537 +++--- llvm/test/CodeGen/Mips/atomic64.ll | 90 +- llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll | 64 +- llvm/test/CodeGen/Mips/copy-fp64.ll | 2 +- llvm/test/CodeGen/Mips/implicit-sret.ll | 57 +- llvm/test/CodeGen/Mips/micromips-eva.mir | 20 +- llvm/test/CodeGen/Mips/msa/ldr_str.ll | 84 +- llvm/test/CodeGen/PowerPC/addegluecrash.ll | 41 +- .../aggressive-anti-dep-breaker-subreg.ll | 2 +- llvm/test/CodeGen/PowerPC/aix-overflow-toc.py | 44 +- llvm/test/CodeGen/PowerPC/anon_aggr.ll | 12 +- .../CodeGen/PowerPC/builtins-ppc-p10vsx.ll | 36 +- llvm/test/CodeGen/PowerPC/elf-common.ll | 12 +- llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll | 24 +- .../CodeGen/PowerPC/fp-int128-fp-combine.ll | 3 +- .../CodeGen/PowerPC/fp-strict-fcmp-noopt.ll | 41 +- llvm/test/CodeGen/PowerPC/fp64-to-int16.ll | 5 +- .../CodeGen/PowerPC/p9-vinsert-vextract.ll | 1188 +++-- llvm/test/CodeGen/PowerPC/popcount.ll | 44 +- llvm/test/CodeGen/PowerPC/spill-nor0.ll | 6 + llvm/test/CodeGen/PowerPC/spill-nor0.mir | 17 + .../CodeGen/PowerPC/stack-guard-reassign.ll | 11 +- llvm/test/CodeGen/PowerPC/vsx-args.ll | 12 +- llvm/test/CodeGen/PowerPC/vsx.ll | 198 +- llvm/test/CodeGen/SPARC/fp16-promote.ll | 59 +- llvm/test/CodeGen/SystemZ/swift-return.ll | 6 +- llvm/test/CodeGen/SystemZ/swifterror.ll | 30 +- .../Thumb2/LowOverheadLoops/branch-targets.ll | 4 +- llvm/test/CodeGen/Thumb2/high-reg-spill.mir | 6 +- llvm/test/CodeGen/Thumb2/mve-vector-spill.ll | 50 +- .../CodeGen/X86/2009-04-14-IllegalRegs.ll | 21 +- .../X86/2010-06-28-FastAllocTiedOperand.ll | 9 +- .../X86/2013-10-14-FastISel-incorrect-vreg.ll | 46 +- llvm/test/CodeGen/X86/atomic-monotonic.ll | 8 +- llvm/test/CodeGen/X86/atomic-unordered.ll | 263 +- llvm/test/CodeGen/X86/atomic32.ll | 404 +- llvm/test/CodeGen/X86/atomic64.ll | 679 ++- llvm/test/CodeGen/X86/atomic6432.ll | 580 +- llvm/test/CodeGen/X86/avx-load-store.ll | 54 +- .../CodeGen/X86/avx512-mask-zext-bugfix.ll | 41 +- .../CodeGen/X86/bug47278-eflags-error.mir | 78 + llvm/test/CodeGen/X86/bug47278.mir | 45 + llvm/test/CodeGen/X86/crash-O0.ll | 28 +- .../CodeGen/X86/extend-set-cc-uses-dbg.ll | 3 +- llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll | 2 +- .../test/CodeGen/X86/fast-isel-nontemporal.ll | 170 +- llvm/test/CodeGen/X86/fast-isel-select-sse.ll | 120 +- llvm/test/CodeGen/X86/fast-isel-select.ll | 8 +- llvm/test/CodeGen/X86/fast-isel-x86-64.ll | 4 +- llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll | 145 +- llvm/test/CodeGen/X86/mixed-ptr-sizes.ll | 64 +- .../CodeGen/X86/phys-reg-local-regalloc.ll | 15 +- llvm/test/CodeGen/X86/pr11415.ll | 7 +- llvm/test/CodeGen/X86/pr1489.ll | 27 +- llvm/test/CodeGen/X86/pr27591.ll | 12 +- llvm/test/CodeGen/X86/pr30430.ll | 122 +- llvm/test/CodeGen/X86/pr30813.ll | 5 +- llvm/test/CodeGen/X86/pr32241.ll | 16 +- llvm/test/CodeGen/X86/pr32284.ll | 85 +- llvm/test/CodeGen/X86/pr32340.ll | 28 +- llvm/test/CodeGen/X86/pr32345.ll | 54 +- llvm/test/CodeGen/X86/pr32451.ll | 20 +- llvm/test/CodeGen/X86/pr32484.ll | 4 +- llvm/test/CodeGen/X86/pr34592.ll | 72 +- llvm/test/CodeGen/X86/pr34653.ll | 82 +- llvm/test/CodeGen/X86/pr39733.ll | 14 +- llvm/test/CodeGen/X86/pr42452.ll | 8 +- llvm/test/CodeGen/X86/pr44749.ll | 30 +- llvm/test/CodeGen/X86/pr47000.ll | 128 +- .../regalloc-fast-missing-live-out-spill.mir | 13 +- llvm/test/CodeGen/X86/stack-protector-msvc.ll | 10 +- .../stack-protector-strong-macho-win32-xor.ll | 5 +- llvm/test/CodeGen/X86/swift-return.ll | 61 +- llvm/test/CodeGen/X86/swifterror.ll | 60 +- llvm/test/CodeGen/X86/volatile.ll | 25 +- llvm/test/CodeGen/X86/win64_eh.ll | 8 +- llvm/test/CodeGen/X86/x86-32-intrcc.ll | 6 +- llvm/test/CodeGen/X86/x86-64-intrcc.ll | 6 +- llvm/test/DebugInfo/AArch64/frameindices.ll | 2 +- llvm/test/DebugInfo/AArch64/prologue_end.ll | 3 +- llvm/test/DebugInfo/ARM/prologue_end.ll | 1 - llvm/test/DebugInfo/Mips/delay-slot.ll | 6 +- llvm/test/DebugInfo/Mips/prologue_end.ll | 4 +- llvm/test/DebugInfo/X86/dbg-declare-arg.ll | 2 +- llvm/test/DebugInfo/X86/fission-ranges.ll | 32 +- llvm/test/DebugInfo/X86/op_deref.ll | 10 +- llvm/test/DebugInfo/X86/parameters.ll | 4 +- llvm/test/DebugInfo/X86/pieces-1.ll | 2 +- llvm/test/DebugInfo/X86/prologue-stack.ll | 5 +- llvm/test/DebugInfo/X86/reference-argument.ll | 2 +- .../test/DebugInfo/X86/spill-indirect-nrvo.ll | 20 +- llvm/test/DebugInfo/X86/sret.ll | 13 +- llvm/test/DebugInfo/X86/subreg.ll | 2 +- 187 files changed, 11217 insertions(+), 10830 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir create mode 100644 llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir create mode 100644 llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir create mode 100644 llvm/test/CodeGen/PowerPC/spill-nor0.mir create mode 100644 llvm/test/CodeGen/X86/bug47278-eflags-error.mir create mode 100644 llvm/test/CodeGen/X86/bug47278.mir diff --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp index be0575541a62a..8d101ba280e8e 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp @@ -28,11 +28,9 @@ int main(int argc, char **argv) { // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov dword ptr [rsp + 0x24], ecx // CHECK: ** 15 foo(); // CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+21>: call {{.*}} ; foo at disassembly.cpp:12 -// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor ecx, ecx -// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+28>: mov dword ptr [rsp + 0x20], eax +// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor eax, eax // CHECK: ** 16 return 0; // CHECK-NEXT: 17 } // CHECK-NEXT: 18 -// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+32>: mov eax, ecx -// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+34>: add rsp, 0x38 -// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+38>: ret +// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+28>: add rsp, 0x38 +// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+32>: ret diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 68308c6e1d4bb..cfee1a77d6b8c 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -56,6 +56,10 @@ STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumCoalesced, "Number of copies coalesced"); +// FIXME: Remove this switch when all testcases are fixed! +static cl::opt IgnoreMissingDefs("rafast-ignore-missing-defs", + cl::Hidden); + static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -85,8 +89,9 @@ namespace { MachineInstr *LastUse = nullptr; ///< Last instr to use reg. Register VirtReg; ///< Virtual register number. MCPhysReg PhysReg = 0; ///< Currently held here. - unsigned short LastOpNum = 0; ///< OpNum on LastUse. - bool Dirty = false; ///< Register needs spill. + bool LiveOut = false; ///< Register is possibly live out. + bool Reloaded = false; ///< Register was reloaded. + bool Error = false; ///< Could not allocate. explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {} @@ -101,6 +106,9 @@ namespace { LiveRegMap LiveVirtRegs; DenseMap> LiveDbgValueMap; + /// List of DBG_VALUE that we encountered without the vreg being assigned + /// because they were placed after the last use of the vreg. + DenseMap> DanglingDbgValues; /// Has a bit set for every virtual register for which it was determined /// that it is alive across blocks. @@ -112,9 +120,13 @@ namespace { /// immediately without checking aliases. regFree, - /// A reserved register has been assigned explicitly (e.g., setting up a - /// call parameter), and it remains reserved until it is used. - regReserved + /// A pre-assigned register has been assigned before register allocation + /// (e.g., setting up a call parameter). + regPreAssigned, + + /// Used temporarily in reloadAtBegin() to mark register units that are + /// live-in to the basic block. + regLiveIn, /// A register state may also be a virtual register number, indication /// that the physical register is currently allocated to a virtual @@ -124,15 +136,17 @@ namespace { /// Maps each physical register to a RegUnitState enum or virtual register. std::vector RegUnitStates; - SmallVector VirtDead; SmallVector Coalesced; using RegUnitSet = SparseSet>; /// Set of register units that are used in the current instruction, and so /// cannot be allocated. RegUnitSet UsedInInstr; + RegUnitSet PhysRegUses; + SmallVector DefOperandIndexes; void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); + bool isPhysRegFree(MCPhysReg PhysReg) const; /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -141,13 +155,29 @@ namespace { } /// Check if a physreg or any of its aliases are used in this instruction. - bool isRegUsedInInstr(MCPhysReg PhysReg) const { - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { if (UsedInInstr.count(*Units)) return true; + if (LookAtPhysRegUses && PhysRegUses.count(*Units)) + return true; + } return false; } + /// Mark physical register as being used in a register use operand. + /// This is only used by the special livethrough handling code. + void markPhysRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + PhysRegUses.insert(*Units); + } + + /// Remove mark of physical register being used in the instruction. + void unmarkRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.erase(*Units); + } + enum : unsigned { spillClean = 50, spillDirty = 100, @@ -177,27 +207,21 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void allocateBasicBlock(MachineBasicBlock &MBB); + + void addRegClassDefCounts(std::vector &RegClassDefCounts, + Register Reg) const; + void allocateInstruction(MachineInstr &MI); void handleDebugValue(MachineInstr &MI); - void handleThroughOperands(MachineInstr &MI, - SmallVectorImpl &VirtDead); - bool isLastUseOfLocalReg(const MachineOperand &MO) const; - - void addKillFlag(const LiveReg &LRI); #ifndef NDEBUG bool verifyRegStateMapping(const LiveReg &LR) const; #endif + bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + void freePhysReg(MCPhysReg PhysReg); - void killVirtReg(LiveReg &LR); - void killVirtReg(Register VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); - void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg); - - void usePhysReg(MachineOperand &MO); - void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - unsigned NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; - void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(Register VirtReg) { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); @@ -207,14 +231,24 @@ namespace { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } - void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint); + void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg); + void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint, + bool LookAtPhysRegUses = false); void allocVirtRegUndef(MachineOperand &MO); - MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, - Register Hint); - LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, - Register Hint); - void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut); - bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg, + MCPhysReg Reg); + void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg); + void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, + bool LookAtPhysRegUses = false); + void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg); + + MachineBasicBlock::iterator + getMBBBeginInsertionPoint(MachineBasicBlock &MBB, + SmallSet &PrologLiveIns) const; + + void reloadAtBegin(MachineBasicBlock &MBB); + void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; @@ -243,6 +277,14 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { RegUnitStates[*UI] = NewState; } +bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != regFree) + return false; + } + return true; +} + /// This allocates space for the specified virtual register to be held on the /// stack. int RegAllocFast::getStackSpaceFor(Register VirtReg) { @@ -300,7 +342,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { // block. static const unsigned Limit = 8; unsigned C = 0; - for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { + for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { if (UseInst.getParent() != MBB || ++C >= Limit) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); // Cannot be live-out if there are no successors. @@ -352,15 +394,19 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); ++NumStores; - // If this register is used by DBG_VALUE then insert new DBG_VALUE to - // identify spilled location as the place to find corresponding variable's - // value. + // When we spill a virtual register, we will have spill instructions behind + // every definition of it, meaning we can switch all the DBG_VALUEs over + // to just reference the stack slot. SmallVectorImpl &LRIDbgValues = LiveDbgValueMap[VirtReg]; for (MachineInstr *DBG : LRIDbgValues) { MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); + // Rewrite unassigned dbg_values to use the stack slot. + MachineOperand &MO = DBG->getOperand(0); + if (MO.isReg() && MO.getReg() == 0) + updateDbgValueForSpill(*DBG, FI); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -379,113 +425,75 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, ++NumLoads; } -/// Return true if MO is the only remaining reference to its virtual register, -/// and it is guaranteed to be a block-local register. -bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const { - // If the register has ever been spilled or reloaded, we conservatively assume - // it is a global register used in multiple blocks. - if (StackSlotForVirtReg[MO.getReg()] != -1) - return false; - - // Check that the use/def chain has exactly one operand - MO. - MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); - if (&*I != &MO) - return false; - return ++I == MRI->reg_nodbg_end(); -} - -/// Set kill flags on last use of a virtual register. -void RegAllocFast::addKillFlag(const LiveReg &LR) { - if (!LR.LastUse) return; - MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); - if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { - if (MO.getReg() == LR.PhysReg) - MO.setIsKill(); - // else, don't do anything we are problably redefining a - // subreg of this register and given we don't track which - // lanes are actually dead, we cannot insert a kill flag here. - // Otherwise we may end up in a situation like this: - // ... = (MO) physreg:sub1, implicit killed physreg - // ... <== Here we would allow later pass to reuse physreg:sub1 - // which is potentially wrong. - // LR:sub0 = ... - // ... = LR.sub1 <== This is going to use physreg:sub1 - } -} - -#ifndef NDEBUG -bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { - for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { - if (RegUnitStates[*UI] != LR.VirtReg) - return false; - } +/// Get basic block begin insertion point. +/// This is not just MBB.begin() because surprisingly we have EH_LABEL +/// instructions marking the begin of a basic block. This means we must insert +/// new instructions after such labels... +MachineBasicBlock::iterator +RegAllocFast::getMBBBeginInsertionPoint( + MachineBasicBlock &MBB, SmallSet &PrologLiveIns) const { + MachineBasicBlock::iterator I = MBB.begin(); + while (I != MBB.end()) { + if (I->isLabel()) { + ++I; + continue; + } - return true; -} -#endif + // Most reloads should be inserted after prolog instructions. + if (!TII->isBasicBlockPrologue(*I)) + break; -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - addKillFlag(LR); - MCPhysReg PhysReg = LR.PhysReg; - setPhysRegState(PhysReg, regFree); - LR.PhysReg = 0; -} + // However if a prolog instruction reads a register that needs to be + // reloaded, the reload should be inserted before the prolog. + for (MachineOperand &MO : I->operands()) { + if (MO.isReg()) + PrologLiveIns.insert(MO.getReg()); + } -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(Register VirtReg) { - assert(Register::isVirtualRegister(VirtReg) && - "killVirtReg needs a virtual register"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - if (LRI != LiveVirtRegs.end() && LRI->PhysReg) - killVirtReg(*LRI); -} + ++I; + } -/// This method spills the value specified by VirtReg into the corresponding -/// stack slot if needed. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - Register VirtReg) { - assert(Register::isVirtualRegister(VirtReg) && - "Spilling a physical register is illegal!"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Spilling unmapped virtual register"); - spillVirtReg(MI, *LRI); + return I; } -/// Do the actual work of spilling. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - - MCPhysReg PhysReg = LR.PhysReg; +/// Reload all currently assigned virtual registers. +void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) { + if (LiveVirtRegs.empty()) + return; - if (LR.Dirty) { - // If this physreg is used by the instruction, we want to kill it on the - // instruction, not on the spill. - bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; - LR.Dirty = false; + for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) { + MCPhysReg Reg = P.PhysReg; + // Set state to live-in. This possibly overrides mappings to virtual + // registers but we don't care anymore at this point. + setPhysRegState(Reg, regLiveIn); + } - spill(MI, LR.VirtReg, PhysReg, SpillKill); - if (SpillKill) - LR.LastUse = nullptr; // Don't kill register again - } - killVirtReg(LR); -} + SmallSet PrologLiveIns; -/// Spill all dirty virtregs without killing them. -void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) { - if (LiveVirtRegs.empty()) - return; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveReg &LR : LiveVirtRegs) { - if (!LR.PhysReg) + MachineBasicBlock::iterator InsertBefore + = getMBBBeginInsertionPoint(MBB, PrologLiveIns); + for (const LiveReg &LR : LiveVirtRegs) { + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg == 0) continue; - if (OnlyLiveOut && !mayLiveOut(LR.VirtReg)) + + unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + if (RegUnitStates[FirstUnit] == regLiveIn) continue; - spillVirtReg(MI, LR); + + assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) && + "no reload in start block. Missing vreg def?"); + + if (PrologLiveIns.count(PhysReg)) { + // FIXME: Theoretically this should use an insert point skipping labels + // but I'm not sure how labels should interact with prolog instruction + // that need reloads. + reload(MBB.begin(), LR.VirtReg, PhysReg); + } else + reload(InsertBefore, LR.VirtReg, PhysReg); } LiveVirtRegs.clear(); } @@ -493,51 +501,74 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) { /// Handle the direct use of a physical register. Check that the register is /// not used by a virtreg. Kill the physreg, marking it free. This may add /// implicit kills to MO->getParent() and invalidate MO. -void RegAllocFast::usePhysReg(MachineOperand &MO) { - // Ignore undef uses. - if (MO.isUndef()) - return; - - Register PhysReg = MO.getReg(); - assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); - - markRegUsedInInstr(PhysReg); - - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (RegUnitStates[*UI]) { - case regReserved: - RegUnitStates[*UI] = regFree; - LLVM_FALLTHROUGH; - case regFree: - break; - default: - llvm_unreachable("Unexpected reg unit state"); - } - } +bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { + assert(Register::isPhysicalRegister(Reg) && "expected physreg"); + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + markRegUsedInInstr(Reg); + return displacedAny; +} - // All aliases are disabled, bring register into working set. - setPhysRegState(PhysReg, regFree); - MO.setIsKill(); +bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + return displacedAny; } /// Mark PhysReg as reserved or free after spilling any virtregs. This is very /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. -void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, unsigned NewState) { +bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { + bool displacedAny = false; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (unsigned VirtReg = RegUnitStates[*UI]) { - default: - spillVirtReg(MI, VirtReg); + unsigned Unit = *UI; + switch (unsigned VirtReg = RegUnitStates[Unit]) { + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "datastructures in sync"); + MachineBasicBlock::iterator ReloadBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + reload(ReloadBefore, VirtReg, LRI->PhysReg); + + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + LRI->Reloaded = true; + displacedAny = true; + break; + } + case regPreAssigned: + RegUnitStates[Unit] = regFree; + displacedAny = true; break; case regFree: - case regReserved: break; } } + return displacedAny; +} - markRegUsedInInstr(PhysReg); - setPhysRegState(PhysReg, NewState); +void RegAllocFast::freePhysReg(MCPhysReg PhysReg) { + LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':'); + + unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + switch (unsigned VirtReg = RegUnitStates[FirstUnit]) { + case regFree: + LLVM_DEBUG(dbgs() << '\n'); + return; + case regPreAssigned: + LLVM_DEBUG(dbgs() << '\n'); + setPhysRegState(PhysReg, regFree); + return; + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end()); + LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n'); + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + } + return; + } } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -545,35 +576,61 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, /// disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { - if (isRegUsedInInstr(PhysReg)) { - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) - << " is already used in instr.\n"); - return spillImpossible; - } - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { switch (unsigned VirtReg = RegUnitStates[*UI]) { case regFree: break; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); + case regPreAssigned: + LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned " + << printReg(PhysReg, TRI) << '\n'); return spillImpossible; default: { - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; + bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 || + findLiveVirtReg(VirtReg)->LiveOut; + return SureSpill ? spillClean : spillDirty; } } } return 0; } +void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, + Register VirtReg, MCPhysReg Reg) { + auto UDBGValIter = DanglingDbgValues.find(VirtReg); + if (UDBGValIter == DanglingDbgValues.end()) + return; + + SmallVectorImpl &Dangling = UDBGValIter->second; + for (MachineInstr *DbgValue : Dangling) { + assert(DbgValue->isDebugValue()); + MachineOperand &MO = DbgValue->getOperand(0); + if (!MO.isReg()) + continue; + + // Test whether the physreg survives from the definition to the DBG_VALUE. + MCPhysReg SetToReg = Reg; + unsigned Limit = 20; + for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()), + E = DbgValue->getIterator(); I != E; ++I) { + if (I->modifiesRegister(Reg, TRI) || --Limit == 0) { + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + SetToReg = 0; + break; + } + } + MO.setReg(SetToReg); + if (SetToReg != 0) + MO.setIsRenamable(); + } + Dangling.clear(); +} + /// This method updates local state so that we know that PhysReg is the /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. -void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { +void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, + MCPhysReg PhysReg) { Register VirtReg = LR.VirtReg; LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " << printReg(PhysReg, TRI) << '\n'); @@ -581,6 +638,8 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; setPhysRegState(PhysReg, VirtReg); + + assignDanglingDebugValues(AtMI, VirtReg, PhysReg); } static bool isCoalescable(const MachineInstr &MI) { @@ -624,11 +683,10 @@ Register RegAllocFast::traceCopies(Register VirtReg) const { } /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, + Register Hint0, bool LookAtPhysRegUses) { const Register VirtReg = LR.VirtReg; - - assert(Register::isVirtualRegister(VirtReg) && - "Can only allocate virtual registers"); + assert(LR.PhysReg == 0); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) @@ -636,41 +694,36 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && - RC.contains(Hint0)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint0); - if (Cost < spillDirty) { + if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) && + !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint0)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) << '\n'); - if (Cost) - definePhysReg(MI, Hint0, regFree); - assignVirtToPhysReg(LR, Hint0); + assignVirtToPhysReg(MI, LR, Hint0); return; } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) - << "occupied\n"); + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI) + << " occupied\n"); } } else { Hint0 = Register(); } + // Try other hint. Register Hint1 = traceCopies(VirtReg); - if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && - RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint1); - if (Cost < spillDirty) { + if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) && + !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint1)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << '\n'); - if (Cost) - definePhysReg(MI, Hint1, regFree); - assignVirtToPhysReg(LR, Hint1); + << '\n'); + assignVirtToPhysReg(MI, LR, Hint1); return; } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << "occupied\n"); + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI) + << " occupied\n"); } } else { Hint1 = Register(); @@ -681,15 +734,20 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); for (MCPhysReg PhysReg : AllocationOrder) { LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); + if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) { + LLVM_DEBUG(dbgs() << "already used in instr.\n"); + continue; + } + unsigned Cost = calcSpillCost(PhysReg); LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); // Immediate take a register with cost 0. if (Cost == 0) { - assignVirtToPhysReg(LR, PhysReg); + assignVirtToPhysReg(MI, LR, PhysReg); return; } - if (PhysReg == Hint1 || PhysReg == Hint0) + if (PhysReg == Hint0 || PhysReg == Hint1) Cost -= spillPrefBonus; if (Cost < BestCost) { @@ -705,13 +763,14 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { MI.emitError("inline assembly requires more registers than available"); else MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AllocationOrder.begin(), regFree); - assignVirtToPhysReg(LR, *AllocationOrder.begin()); + + LR.Error = true; + LR.PhysReg = 0; return; } - definePhysReg(MI, BestReg, regFree); - assignVirtToPhysReg(LR, BestReg); + displacePhysReg(MI, BestReg); + assignVirtToPhysReg(MI, LR, BestReg); } void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { @@ -739,212 +798,166 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { MO.setIsRenamable(true); } -/// Allocates a register for VirtReg and mark it as dirty. -MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, - Register VirtReg, Register Hint) { - assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); +/// Variation of defineVirtReg() with special handling for livethrough regs +/// (tied or earlyclobber) that may interfere with preassigned uses. +void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + if (LRI != LiveVirtRegs.end()) { + MCPhysReg PrevReg = LRI->PhysReg; + if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) { + LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI) + << " (tied/earlyclobber resolution)\n"); + freePhysReg(PrevReg); + LRI->PhysReg = 0; + allocVirtReg(MI, *LRI, 0, true); + MachineBasicBlock::iterator InsertBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to " + << printReg(PrevReg, TRI) << '\n'); + BuildMI(*MBB, InsertBefore, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), PrevReg) + .addReg(LRI->PhysReg, llvm::RegState::Kill); + } + MachineOperand &MO = MI.getOperand(OpNum); + if (MO.getSubReg() && !MO.isUndef()) { + LRI->LastUse = &MI; + } + } + return defineVirtReg(MI, OpNum, VirtReg, true); +} + +/// Allocates a register for VirtReg definition. Typically the register is +/// already assigned from a use of the virtreg, however we still need to +/// perform an allocation if: +/// - It is a dead definition without any uses. +/// - The value is live out and all uses are in different basic blocks. +void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg, bool LookAtPhysRegUses) { + assert(VirtReg.isVirtual() && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - if (!LRI->PhysReg) { - // If there is no hint, peek at the only use of this register. - if ((!Hint || !Hint.isPhysical()) && - MRI->hasOneNonDBGUse(VirtReg)) { - const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); - // It's a copy, use the destination register as a hint. - if (UseMI.isCopyLike()) - Hint = UseMI.getOperand(0).getReg(); + if (New) { + if (!MO.isDead()) { + if (mayLiveOut(VirtReg)) { + LRI->LiveOut = true; + } else { + // It is a dead def without the dead flag; add the flag now. + MO.setIsDead(true); + } } - allocVirtReg(MI, *LRI, Hint); - } else if (LRI->LastUse) { - // Redefining a live register - kill at the last use, unless it is this - // instruction defining VirtReg multiple times. - if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) - addKillFlag(*LRI); } - assert(LRI->PhysReg && "Register not assigned"); - LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; - LRI->Dirty = true; - markRegUsedInInstr(LRI->PhysReg); - return LRI->PhysReg; + if (LRI->PhysReg == 0) + allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses); + else { + assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) && + "TODO: preassign mismatch"); + LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI) + << " use existing assignment to " + << printReg(LRI->PhysReg, TRI) << '\n'); + } + + MCPhysReg PhysReg = LRI->PhysReg; + assert(PhysReg != 0 && "Register not assigned"); + if (LRI->Reloaded || LRI->LiveOut) { + if (!MI.isImplicitDef()) { + MachineBasicBlock::iterator SpillBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: " + << LRI->Reloaded << '\n'); + bool Kill = LRI->LastUse == nullptr; + spill(SpillBefore, VirtReg, PhysReg, Kill); + LRI->LastUse = nullptr; + } + LRI->LiveOut = false; + LRI->Reloaded = false; + } + markRegUsedInInstr(PhysReg); + setPhysReg(MI, MO, PhysReg); } -/// Make sure VirtReg is available in a physreg and return it. -RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - Register VirtReg, - Register Hint) { - assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); +/// Allocates a register for a VirtReg use. +void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg) { + assert(VirtReg.isVirtual() && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - MachineOperand &MO = MI.getOperand(OpNum); - if (!LRI->PhysReg) { - allocVirtReg(MI, *LRI, Hint); - reload(MI, VirtReg, LRI->PhysReg); - } else if (LRI->Dirty) { - if (isLastUseOfLocalReg(MO)) { - LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n'); - if (MO.isUse()) - MO.setIsKill(); - else - MO.setIsDead(); - } else if (MO.isKill()) { - LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n'); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n'); - MO.setIsDead(false); + if (New) { + MachineOperand &MO = MI.getOperand(OpNum); + if (!MO.isKill()) { + if (mayLiveOut(VirtReg)) { + LRI->LiveOut = true; + } else { + // It is a last (killing) use without the kill flag; add the flag now. + MO.setIsKill(true); + } } - } else if (MO.isKill()) { - // We must remove kill flags from uses of reloaded registers because the - // register would be killed immediately, and there might be a second use: - // %foo = OR killed %x, %x - // This would cause a second reload of %x into a different register. - LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n'); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n'); - MO.setIsDead(false); + } else { + assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag"); } - assert(LRI->PhysReg && "Register not assigned"); + + // If necessary allocate a register. + if (LRI->PhysReg == 0) { + assert(!MO.isTied() && "tied op should be allocated"); + Register Hint; + if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) { + Hint = MI.getOperand(0).getReg(); + assert(Hint.isPhysical() && + "Copy destination should already be assigned"); + } + allocVirtReg(MI, *LRI, Hint, false); + if (LRI->Error) { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + setPhysReg(MI, MO, *AllocationOrder.begin()); + return; + } + } + LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); - return *LRI; + setPhysReg(MI, MO, LRI->PhysReg); } /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This /// may invalidate any operand pointers. Return true if the operand kills its /// register. -bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, +void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg) { - bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); MO.setIsRenamable(true); - return MO.isKill() || Dead; + return; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register()); MO.setIsRenamable(true); - MO.setSubReg(0); + // Note: We leave the subreg number around a little longer in case of defs. + // This is so that the register freeing logic in allocateInstruction can still + // recognize this as subregister defs. The code there will clear the number. + if (!MO.isDef()) + MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super // register kill. if (MO.isKill()) { MI.addRegisterKilled(PhysReg, TRI, true); - return true; + return; } // A of a sub-register requires an implicit def of the full // register. - if (MO.isDef() && MO.isUndef()) - MI.addRegisterDefined(PhysReg, TRI); - - return Dead; -} - -// Handles special instruction operand like early clobbers and tied ops when -// there are additional physreg defines. -void RegAllocFast::handleThroughOperands(MachineInstr &MI, - SmallVectorImpl &VirtDead) { - LLVM_DEBUG(dbgs() << "Scanning for through registers:"); - SmallSet ThroughRegs; - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg.isVirtual()) - continue; - if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || - (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { - if (ThroughRegs.insert(Reg).second) - LLVM_DEBUG(dbgs() << ' ' << printReg(Reg)); - } - } - - // If any physreg defines collide with preallocated through registers, - // we must spill and reallocate. - LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) - continue; - markRegUsedInInstr(Reg); - - for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { - if (!ThroughRegs.count(RegUnitStates[*UI])) - continue; - - // Need to spill any aliasing registers. - for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { - for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { - definePhysReg(MI, *SI, regFree); - } - } - } - } - - SmallVector PartialDefs; - LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - if (MO.isUse()) { - if (!MO.isTied()) continue; - LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO - << ") is tied to operand " << MI.findTiedOperandIdx(I) - << ".\n"); - LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LR.PhysReg; - setPhysReg(MI, MO, PhysReg); - // Note: we don't update the def operand yet. That would cause the normal - // def-scan to attempt spilling. - } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { - LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n'); - // Reload the register, but don't assign to the operand just yet. - // That would confuse the later phys-def processing pass. - LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); - PartialDefs.push_back(LR.PhysReg); - } - } - - LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - if (!MO.isEarlyClobber()) - continue; - // Note: defineVirtReg may invalidate MO. - MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0); - if (setPhysReg(MI, MI.getOperand(I), PhysReg)) - VirtDead.push_back(Reg); - } - - // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.clear(); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) - continue; - LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) - << " as used in instr\n"); - markRegUsedInInstr(Reg); + if (MO.isDef() && MO.isUndef()) { + if (MO.isDead()) + MI.addRegisterDead(PhysReg, TRI, true); + else + MI.addRegisterDefined(PhysReg, TRI); } - - // Also mark PartialDefs as used to avoid reallocation. - for (Register PartialDef : PartialDefs) - markRegUsedInInstr(PartialDef); } #ifndef NDEBUG @@ -955,15 +968,21 @@ void RegAllocFast::dumpState() const { switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; - case regReserved: + case regPreAssigned: dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; + case regLiveIn: + llvm_unreachable("Should not have regLiveIn in map"); default: { dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); - if (I->Dirty) - dbgs() << "[D]"; + if (I->LiveOut || I->Reloaded) { + dbgs() << '['; + if (I->LiveOut) dbgs() << 'O'; + if (I->Reloaded) dbgs() << 'R'; + dbgs() << ']'; + } assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } @@ -986,111 +1005,277 @@ void RegAllocFast::dumpState() const { } #endif -void RegAllocFast::allocateInstruction(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // If this is a copy, we may be able to coalesce. - Register CopySrcReg; - Register CopyDstReg; - unsigned CopySrcSub = 0; - unsigned CopyDstSub = 0; - if (MI.isCopy()) { - CopyDstReg = MI.getOperand(0).getReg(); - CopySrcReg = MI.getOperand(1).getReg(); - CopyDstSub = MI.getOperand(0).getSubReg(); - CopySrcSub = MI.getOperand(1).getSubReg(); +/// Count number of defs consumed from each register class by \p Reg +void RegAllocFast::addRegClassDefCounts(std::vector &RegClassDefCounts, + Register Reg) const { + assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); + + if (Reg.isVirtual()) { + const TargetRegisterClass *OpRC = MRI->getRegClass(Reg); + for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); + RCIdx != RCIdxEnd; ++RCIdx) { + const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx); + // FIXME: Consider aliasing sub/super registers. + if (OpRC->hasSubClassEq(IdxRC)) + ++RegClassDefCounts[RCIdx]; + } + + return; } - // Track registers used by instruction. + for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); + RCIdx != RCIdxEnd; ++RCIdx) { + const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx); + for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) { + if (IdxRC->contains(*Alias)) { + ++RegClassDefCounts[RCIdx]; + break; + } + } + } +} + +void RegAllocFast::allocateInstruction(MachineInstr &MI) { + // The basic algorithm here is: + // 1. Mark registers of def operands as free + // 2. Allocate registers to use operands and place reload instructions for + // registers displaced by the allocation. + // + // However we need to handle some corner cases: + // - pre-assigned defs and uses need to be handled before the other def/use + // operands are processed to avoid the allocation heuristics clashing with + // the pre-assignment. + // - The "free def operands" step has to come last instead of first for tied + // operands and early-clobbers. + UsedInInstr.clear(); - // First scan. - // Mark physreg uses and early clobbers as used. - // Find the end of the virtreg operands - unsigned VirtOpEnd = 0; - bool hasTiedOps = false; - bool hasEarlyClobbers = false; - bool hasPartialRedefs = false; - bool hasPhysDefs = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - // Make sure MRI knows about registers clobbered by regmasks. - if (MO.isRegMask()) { - MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - continue; + // Scan for special cases; Apply pre-assigned register defs to state. + bool HasPhysRegUse = false; + bool HasRegMask = false; + bool HasVRegDef = false; + bool HasDef = false; + bool HasEarlyClobber = false; + bool NeedToAssignLiveThroughs = false; + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (Reg.isVirtual()) { + if (MO.isDef()) { + HasDef = true; + HasVRegDef = true; + if (MO.isEarlyClobber()) { + HasEarlyClobber = true; + NeedToAssignLiveThroughs = true; + } + if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + NeedToAssignLiveThroughs = true; + } + } else if (Reg.isPhysical()) { + if (!MRI->isReserved(Reg)) { + if (MO.isDef()) { + HasDef = true; + bool displacedAny = definePhysReg(MI, Reg); + if (MO.isEarlyClobber()) + HasEarlyClobber = true; + if (!displacedAny) + MO.setIsDead(true); + } + if (MO.readsReg()) + HasPhysRegUse = true; + } + } + } else if (MO.isRegMask()) { + HasRegMask = true; } - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg) continue; - if (Register::isVirtualRegister(Reg)) { - VirtOpEnd = i+1; - if (MO.isUse()) { - hasTiedOps = hasTiedOps || - MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; + } + + // Allocate virtreg defs. + if (HasDef) { + if (HasVRegDef) { + // Special handling for early clobbers, tied operands or subregister defs: + // Compared to "normal" defs these: + // - Must not use a register that is pre-assigned for a use operand. + // - In order to solve tricky inline assembly constraints we change the + // heuristic to figure out a good operand order before doing + // assignments. + if (NeedToAssignLiveThroughs) { + DefOperandIndexes.clear(); + PhysRegUses.clear(); + + // Track number of defs which may consume a register from the class. + std::vector RegClassDefCounts(TRI->getNumRegClasses(), 0); + assert(RegClassDefCounts[0] == 0); + + LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (MO.readsReg()) { + if (Reg.isPhysical()) { + LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) + << '\n'); + markPhysRegUsedInInstr(Reg); + } + } + + if (MO.isDef()) { + if (Reg.isVirtual()) + DefOperandIndexes.push_back(I); + + addRegClassDefCounts(RegClassDefCounts, Reg); + } + } + + llvm::sort(DefOperandIndexes.begin(), DefOperandIndexes.end(), + [&](uint16_t I0, uint16_t I1) { + const MachineOperand &MO0 = MI.getOperand(I0); + const MachineOperand &MO1 = MI.getOperand(I1); + Register Reg0 = MO0.getReg(); + Register Reg1 = MO1.getReg(); + const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0); + const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1); + + // Identify regclass that are easy to use up completely just in this + // instruction. + unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size(); + unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size(); + + bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()]; + bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()]; + if (SmallClass0 > SmallClass1) + return true; + if (SmallClass0 < SmallClass1) + return false; + + // Allocate early clobbers and livethrough operands first. + bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() || + (MO0.getSubReg() == 0 && !MO0.isUndef()); + bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() || + (MO1.getSubReg() == 0 && !MO1.isUndef()); + if (Livethrough0 > Livethrough1) + return true; + if (Livethrough0 < Livethrough1) + return false; + + // Tie-break rule: operand index. + return I0 < I1; + }); + + for (uint16_t OpIdx : DefOperandIndexes) { + MachineOperand &MO = MI.getOperand(OpIdx); + LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); + unsigned Reg = MO.getReg(); + if (MO.isEarlyClobber() || MO.isTied() || + (MO.getSubReg() && !MO.isUndef())) { + defineLiveThroughVirtReg(MI, OpIdx, Reg); + } else { + defineVirtReg(MI, OpIdx, Reg); + } + } } else { - if (MO.isEarlyClobber()) - hasEarlyClobbers = true; - if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) - hasPartialRedefs = true; + // Assign virtual register defs. + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual()) + defineVirtReg(MI, I, Reg); + } } - continue; } - if (!MRI->isAllocatable(Reg)) continue; - if (MO.isUse()) { - usePhysReg(MO); - } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, - (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); - hasEarlyClobbers = true; - } else - hasPhysDefs = true; + + // Free registers occupied by defs. + // Iterate operands in reverse order, so we see the implicit super register + // defs first (we added them earlier in case of ). + for (unsigned I = MI.getNumOperands(); I-- > 0;) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); + continue; + } + + // Do not free tied operands and early clobbers. + if (MO.isTied() || MO.isEarlyClobber()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(Reg.isPhysical()); + if (MRI->isReserved(Reg)) + continue; + freePhysReg(Reg); + unmarkRegUsedInInstr(Reg); + } } - // The instruction may have virtual register operands that must be allocated - // the same register at use-time and def-time: early clobbers and tied - // operands. If there are also physical defs, these registers must avoid - // both physical defs and uses, making them more constrained than normal - // operands. - // Similarly, if there are multiple defs and tied operands, we must make - // sure the same register is allocated to uses and defs. - // We didn't detect inline asm tied operands above, so just make this extra - // pass for all inline asm. - if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { - handleThroughOperands(MI, VirtDead); - // Don't attempt coalescing when we have funny stuff going on. - CopyDstReg = Register(); - // Pretend we have early clobbers so the use operands get marked below. - // This is not necessary for the common case of a single tied use. - hasEarlyClobbers = true; + // Displace clobbered registers. + if (HasRegMask) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + // MRI bookkeeping. + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + + // Displace clobbered registers. + const uint32_t *Mask = MO.getRegMask(); + for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(), + LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) { + MCPhysReg PhysReg = LRI->PhysReg; + if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg)) + displacePhysReg(MI, PhysReg); + } + } + } + } + + // Apply pre-assigned register uses to state. + if (HasPhysRegUse) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; + if (MRI->isReserved(Reg)) + continue; + bool displacedAny = usePhysReg(MI, Reg); + if (!displacedAny && !MRI->isReserved(Reg)) + MO.setIsKill(true); + } } - // Second scan. - // Allocate virtreg uses. + // Allocate virtreg uses and insert reloads as necessary. bool HasUndefUse = false; - for (unsigned I = 0; I != VirtOpEnd; ++I) { + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; Register Reg = MO.getReg(); if (!Reg.isVirtual()) continue; - if (MO.isUse()) { - if (MO.isUndef()) { - HasUndefUse = true; - // There is no need to allocate a register for an undef use. - continue; - } - // Populate MayLiveAcrossBlocks in case the use block is allocated before - // the def block (removing the vreg uses). - mayLiveIn(Reg); - - LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg); - MCPhysReg PhysReg = LR.PhysReg; - CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; - if (setPhysReg(MI, MO, PhysReg)) - killVirtReg(LR); + if (MO.isUndef()) { + HasUndefUse = true; + continue; } + + + // Populate MayLiveAcrossBlocks in case the use block is allocated before + // the def block (removing the vreg uses). + mayLiveIn(Reg); + + + assert(!MO.isInternalRead() && "Bundles not supported"); + assert(MO.readsReg() && "reading use"); + useVirtReg(MI, I, Reg); } // Allocate undef operands. This is a separate step because in a situation @@ -1109,76 +1294,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { } } - // Track registers defined by instruction - early clobbers and tied uses at - // this point. - UsedInInstr.clear(); - if (hasEarlyClobbers) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) + // Free early clobbers. + if (HasEarlyClobber) { + for (unsigned I = MI.getNumOperands(); I-- > 0; ) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) continue; - // Look for physreg defs and tied uses. - if (!MO.isDef() && !MO.isTied()) continue; - markRegUsedInInstr(Reg); - } - } - - unsigned DefOpEnd = MI.getNumOperands(); - if (MI.isCall()) { - // Spill all virtregs before a call. This serves one purpose: If an - // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots. - // Note: although this is appealing to just consider all definitions - // as call-clobbered, this is not correct because some of those - // definitions may be used later on and we do not want to reuse - // those for virtual registers in between. - LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI, /*OnlyLiveOut*/ false); - } - - // Third scan. - // Mark all physreg defs as used before allocating virtreg defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - Register Reg = MO.getReg(); - - if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg)) - continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - } + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); + continue; + } - // Fourth scan. - // Allocate defs and collect dead defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - Register Reg = MO.getReg(); + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(Reg.isPhysical() && "should have register assigned"); + + // We sometimes get odd situations like: + // early-clobber %x0 = INSTRUCTION %x0 + // which is semantically questionable as the early-clobber should + // apply before the use. But in practice we consider the use to + // happen before the early clobber now. Don't free the early clobber + // register in this case. + if (MI.readsRegister(Reg, TRI)) + continue; - // We have already dealt with phys regs in the previous scan. - if (Reg.isPhysical()) - continue; - MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); - if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { - VirtDead.push_back(Reg); - CopyDstReg = Register(); // cancel coalescing; - } else - CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; + freePhysReg(Reg); + } } - // Kill dead defs after the scan to ensure that multiple defs of the same - // register are allocated identically. We didn't need to do this for uses - // because we are creating our own kill flags, and they are always at the last - // use. - for (Register VirtReg : VirtDead) - killVirtReg(VirtReg); - VirtDead.clear(); - LLVM_DEBUG(dbgs() << "<< " << MI); - if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { + if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getNumOperands() == 2) { LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n"); Coalesced.push_back(&MI); } @@ -1195,23 +1344,22 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { if (!Register::isVirtualRegister(Reg)) return; + // Already spilled to a stackslot? + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(MI, SS); + LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI); + return; + } + // See if this virtual register has already been allocated to a physical // register or spilled to a stack slot. LiveRegMap::iterator LRI = findLiveVirtReg(Reg); if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { setPhysReg(MI, MO, LRI->PhysReg); } else { - int SS = StackSlotForVirtReg[Reg]; - if (SS != -1) { - // Modify DBG_VALUE now that the value is in a spill slot. - updateDbgValueForSpill(MI, SS); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI); - return; - } - - // We can't allocate a physreg for a DebugValue, sorry! - LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(Register()); + DanglingDbgValues[Reg].push_back(&MI); } // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so @@ -1219,6 +1367,17 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { LiveDbgValueMap[Reg].push_back(&MI); } +#ifndef NDEBUG +bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { + for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != LR.VirtReg) + return false; + } + + return true; +} +#endif + void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); @@ -1226,18 +1385,15 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - MachineBasicBlock::iterator MII = MBB.begin(); - - // Add live-in registers as live. - for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins()) - if (MRI->isAllocatable(LI.PhysReg)) - definePhysReg(MII, LI.PhysReg, regReserved); + for (MachineBasicBlock *Succ : MBB.successors()) { + for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins()) + setPhysRegState(LI.PhysReg, regPreAssigned); + } - VirtDead.clear(); Coalesced.clear(); - // Otherwise, sequentially allocate each instruction in the MBB. - for (MachineInstr &MI : MBB) { + // Traverse block in reverse order allocating instructions one by one. + for (MachineInstr &MI : reverse(MBB)) { LLVM_DEBUG( dbgs() << "\n>> " << MI << "Regs:"; dumpState() @@ -1253,9 +1409,14 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { allocateInstruction(MI); } + LLVM_DEBUG( + dbgs() << "Begin Regs:"; + dumpState() + ); + // Spill all physical registers holding virtual registers now. - LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true); + LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n"); + reloadAtBegin(MBB); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. @@ -1263,6 +1424,20 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { MBB.erase(MI); NumCoalesced += Coalesced.size(); + for (auto &UDBGPair : DanglingDbgValues) { + for (MachineInstr *DbgValue : UDBGPair.second) { + assert(DbgValue->isDebugValue() && "expected DBG_VALUE"); + MachineOperand &MO = DbgValue->getOperand(0); + // Nothing to do if the vreg was spilled in the meantime. + if (!MO.isReg()) + continue; + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + MO.setReg(0); + } + } + DanglingDbgValues.clear(); + LLVM_DEBUG(MBB.dump()); } @@ -1276,8 +1451,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { MFI = &MF.getFrameInfo(); MRI->freezeReservedRegs(MF); RegClassInfo.runOnMachineFunction(MF); + unsigned NumRegUnits = TRI->getNumRegUnits(); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegUnits()); + UsedInInstr.setUniverse(NumRegUnits); + PhysRegUses.clear(); + PhysRegUses.setUniverse(NumRegUnits); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll index 7bcd3474f8e85..436662e3909ec 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll @@ -1,5 +1,5 @@ -;; RUN: llc -mtriple aarch64 -global-isel -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOP -;; RUN: llc -mtriple aarch64 -mattr=+v8.3a -global-isel -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-V83 +;; RUN: llc -mtriple aarch64 -global-isel -O0 %s -o - | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NOP +;; RUN: llc -mtriple aarch64 -mattr=+v8.3a -global-isel -O0 %s -o - | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-V83 declare void @g0() #1 declare void @g1(i8*) #1 declare void @g2(i32, i8*) #1 @@ -18,22 +18,22 @@ entry: ;; CHECK-LABEL: f0: ;; CHECK-NOT: {{(mov|ldr)}} x30 ;; CHECK-NOP: hint #7 -;; CHECK-V83: xpaci x30 +;; CHECK-V83: mov [[COPY_X30:x[0-9]+]], x30 +;; CHECK-V83: xpaci [[COPY_X30]] ;; CHECK: bl g1 ;; CHECK: ldr x[[T0:[0-9]+]], [x29] ;; CHECK-NOP-NEXT: ldr x30, [x[[T0]], #8] ;; CHECK-NOP-NEXT: hint #7 -;; CHECK-V83-NEXT: ldr x[[T0]], [x[[T0]], #8] -;; CHECK-V83-NEXT: xpaci x[[T0]] +;; CHECK-V83-NEXT: ldr x[[LD0:[0-9]+]], [x[[T0]], #8] +;; CHECK-V83-NEXT: xpaci x[[LD0]] ;; CHECK: bl g2 ;; CHECK: ldr x[[T1:[0-9]+]], [x29] ;; CHECK-NEXT: ldr x[[T1]], [x[[T1]]] ;; CHECK-NOP-NEXT: ldr x30, [x[[T1]], #8] ;; CHECK-NOP-NEXT: hint #7 ;; CHECK-NOP-NEXT: mov x0, x30 -;; CHECK-V83-NEXT: ldr x[[T1]], [x[[T1]], #8] -;; CHECK-V83-NEXT: xpaci x[[T1]] -;; CHECK-V83-NEXT: mov x0, x[[T1]] +;; CHECK-V83-NEXT: ldr x0, [x[[T1]], #8] +;; CHECK-V83-NEXT: xpaci x0 define i8* @f1() #0 { entry: @@ -49,23 +49,25 @@ entry: ;; CHECK-NOP-DAG: str x30, [sp, #[[OFF:[0-9]+]] ;; CHECK-NOP: ldr x30, [x[[T0]], #8] ;; CHECK-NOP-NEXT: hint #7 -;; CHECK-V83: ldr x[[T0]], [x[[T0]], #8] -;; CHECK-V83-NEXT: xpaci x[[T0]] -;; CHECK-V83: str x30, [sp, #[[OFF:[0-9]+]] +;; CHECK-V83-DAG: str x30, [sp, #[[OFF:[0-9]+]] +;; CHECK-V83: ldr x[[T1:[0-9]+]], [x[[T0]], #8] +;; CHECK-V83-NEXT: xpaci x[[T1]] + ;; CHECK: bl g1 -;; CHECK: ldr x[[T1:[0-9]+]], [x29] -;; CHECK-NEXT: ldr x[[T1]], [x[[T1]]] -;; CHECK-NOP-NEXT: ldr x30, [x[[T1]], #8] +;; CHECK: ldr x[[T2:[0-9]+]], [x29] +;; CHECK-NEXT: ldr x[[T2]], [x[[T2]]] +;; CHECK-NOP-NEXT: ldr x30, [x[[T2]], #8] ;; CHECK-NOP-NEXT: hint #7 -;; CHECK-V83-NEXT: ldr x[[T1]], [x[[T1]], #8] -;; CHECK-V83-NEXT: xpaci x[[T1]] +;; CHECK-V83-NEXT: ldr x[[T3:[0-9]+]], [x[[T2]], #8] +;; CHECK-V83-NEXT: xpaci x[[T3]] ;; CHECK: bl g2 -;; CHECK: ldr x[[T2:[0-9]+]], [sp, #[[OFF]]] -;; CHECK-NOP-NEXT: mov x30, x[[T2]] + +;; CHECK-NOP: ldr x30, [sp, #[[OFF]]] ;; CHECK-NOP-NEXT: hint #7 ;; CHECK-NOP-NEXT: mov x0, x30 -;; CHECK-V83-NEXT: xpaci x[[T2]] -;; CHECK-V83-NEXT: mov x0, x[[T2]] + +;; CHECK-V83: ldr x0, [sp, #[[OFF]]] +;; CHECK-V83-NEXT: xpaci x0 ;; CHECK-NOT: x0 ;; CHECK: ret @@ -77,12 +79,12 @@ entry: } ;; CHECK-LABEL: f2 ;; CHECK: bl g0 -;; CHECK: ldr x[[T0:[0-9]+]], [sp, -;; CHECK-NOP-NEXT: mov x30, x[[T2]] +;; CHECK-NOP: ldr x30, [sp, ;; CHECK-NOP-NEXT: hint #7 ;; CHECK-NOP-NEXT: mov x0, x30 -;; CHECK-V83-NEXT: xpaci x[[T2]] -;; CHECK-V83-NEXT: mov x0, x[[T2]] + +;; CHECK-V83: ldr x0, [sp, +;; CHECK-V83-NEXT: xpaci x0 ;; CHECK-NOT: x0 ;; CHECK: ret @@ -92,10 +94,12 @@ entry: ret i8* %0 } ;; CHECK-LABEL: f3: -;; CHECK: str x30, [sp, +;; CHECK-NOP: str x30, [sp, ;; CHECK-NOP-NEXT: hint #7 -;; CHECK-V83-NEXT: xpaci x30 -;; CHECK-NEXT: mov x0, x30 +;; CHECK-NOP-NEXT: mov x0, x30 + +;; CHECK-V83: mov x0, x30 +;; CHECK-V83-NEXT: xpaci x0 ;; CHECK-NOT: x0 ;; CHECK: ret attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll index cbeac5d85fc4e..296795b327617 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll @@ -31,9 +31,8 @@ target triple = "arm64-apple-ios13.0.0" ; This test checks that we don't re-use the register for the variable descriptor ; for the second ldr. ; CHECK: adrp x[[PTR1:[0-9]+]], _t_val@TLVPPAGE -; CHECK: ldr x[[PTR1]], [x[[PTR1]], _t_val@TLVPPAGEOFF] -; CHECK: ldr x[[FPTR:[0-9]+]], [x[[PTR1]]] -; CHECK: mov x0, x[[PTR1]] +; CHECK: ldr x0, [x[[PTR1]], _t_val@TLVPPAGEOFF] +; CHECK: ldr x[[FPTR:[0-9]+]], [x0] ; CHECK: blr x[[FPTR]] define void @_Z4funcPKc(i8* %id) { diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 805ba09bace28..d563ccb851ce1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,7 +94,7 @@ entry: store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: tbz w8, #0, LBB4_2 +; CHECK: tbz {{w[0-9]+}}, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll index 6b5799bdefd94..586b7d116f5c8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -79,8 +79,7 @@ declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 define i32 @t2() { entry: ; CHECK-LABEL: t2 -; CHECK: mov [[REG1:x[0-9]+]], xzr -; CHECK: mov x0, [[REG1]] +; CHECK: mov x0, xzr ; CHECK: mov w1, #-8 ; CHECK: mov [[REG2:w[0-9]+]], #1023 ; CHECK: uxth w2, [[REG2]] diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll index 7c546936ba27a..b3c073f535420 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -4,9 +4,8 @@ define i32 @fptosi_wh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptosi_wh -; CHECK: fcvt s0, h0 -; CHECK: fcvtzs [[REG:w[0-9]+]], s0 -; CHECK: mov w0, [[REG]] +; CHECK: fcvt [[REG:s[0-9]+]], h0 +; CHECK: fcvtzs w0, [[REG]] %conv = fptosi half %a to i32 ret i32 %conv } @@ -15,9 +14,8 @@ entry: define i32 @fptoui_swh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptoui_swh -; CHECK: fcvt s0, h0 -; CHECK: fcvtzu [[REG:w[0-9]+]], s0 -; CHECK: mov w0, [[REG]] +; CHECK: fcvt [[REG:s[0-9]+]], h0 +; CHECK: fcvtzu w0, [[REG]] %conv = fptoui half %a to i32 ret i32 %conv } @@ -26,8 +24,8 @@ entry: define half @sitofp_hw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i1 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK: scvtf s0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i1 %a to half ret half %conv @@ -37,8 +35,8 @@ entry: define half @sitofp_hw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i8 -; CHECK: sxtb w8, w0 -; CHECK: scvtf s0, w8 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i8 %a to half ret half %conv @@ -48,8 +46,8 @@ entry: define half @sitofp_hw_i16(i16 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i16 -; CHECK: sxth w8, w0 -; CHECK: scvtf s0, w8 +; CHECK: sxth [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i16 %a to half ret half %conv @@ -79,8 +77,8 @@ entry: define half @uitofp_hw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i1 -; CHECK: and w8, w0, #0x1 -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i1 %a to half ret half %conv @@ -90,8 +88,8 @@ entry: define half @uitofp_hw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i8 -; CHECK: and w8, w0, #0xff -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0xff +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i8 %a to half ret half %conv @@ -101,8 +99,8 @@ entry: define half @uitofp_hw_i16(i16 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i16 -; CHECK: and w8, w0, #0xffff -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0xffff +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i16 %a to half ret half %conv diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index d8abf14c1366b..26ce3a3b94aa8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck -enable-var-scope %s ;; Test various conversions. define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp { @@ -49,13 +49,12 @@ entry: ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrb w8, [sp, #15] -; CHECK: strh w8, [sp, #12] -; CHECK: ldrh w8, [sp, #12] -; CHECK: str w8, [sp, #8] -; CHECK: ldr w8, [sp, #8] -; CHECK: ; kill: def $x8 killed $w8 -; CHECK: str x8, [sp] +; CHECK: ldrb [[REG0:w[0-9]+]], [sp, #15] +; CHECK: strh [[REG0]], [sp, #12] +; CHECK: ldrh [[REG1:w[0-9]+]], [sp, #12] +; CHECK: str [[REG1]], [sp, #8] +; CHECK: ldr w[[REG2:[0-9]+]], [sp, #8] +; CHECK: str x[[REG2]], [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -105,12 +104,12 @@ entry: ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrsb w8, [sp, #15] -; CHECK: strh w8, [sp, #12] -; CHECK: ldrsh w8, [sp, #12] -; CHECK: str w8, [sp, #8] -; CHECK: ldrsw x8, [sp, #8] -; CHECK: str x8, [sp] +; CHECK: ldrsb [[REG0:w[0-9]+]], [sp, #15] +; CHECK: strh [[REG0]], [sp, #12] +; CHECK: ldrsh [[REG1:w[0-9]+]], [sp, #12] +; CHECK: str [[REG1]], [sp, #8] +; CHECK: ldrsw [[REG2:x[0-9]+]], [sp, #8] +; CHECK: str [[REG2]], [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -166,8 +165,8 @@ entry: define signext i16 @sext_i1_i16(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sext_i1_i16 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK-NEXT: sxth w0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: sxth w0, [[REG]] %conv = sext i1 %a to i16 ret i16 %conv } @@ -176,8 +175,8 @@ entry: define signext i8 @sext_i1_i8(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sext_i1_i8 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK-NEXT: sxtb w0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: sxtb w0, [[REG]] %conv = sext i1 %a to i8 ret i8 %conv } @@ -240,8 +239,8 @@ entry: define float @sitofp_sw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_sw_i1 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK: scvtf s0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: scvtf s0, [[REG]] %conv = sitofp i1 %a to float ret float %conv } @@ -250,8 +249,8 @@ entry: define float @sitofp_sw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_sw_i8 -; CHECK: sxtb w8, w0 -; CHECK: scvtf s0, w8 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] %conv = sitofp i8 %a to float ret float %conv } @@ -304,8 +303,8 @@ entry: define float @uitofp_sw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_sw_i1 -; CHECK: and w8, w0, #0x1 -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: ucvtf s0, [[REG]] %conv = uitofp i1 %a to float ret float %conv } @@ -374,7 +373,8 @@ entry: define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i16 -; CHECK: and [[REG2:w[0-9]+]], w0, #0xffff +; CHECK: mov x[[TMP:[0-9]+]], x0 +; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xffff{{$}} ; CHECK: uxth w0, [[REG2]] %conv = trunc i64 %a to i16 ret i16 %conv @@ -383,7 +383,8 @@ entry: define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i8 -; CHECK: and [[REG2:w[0-9]+]], w0, #0xff +; CHECK: mov x[[TMP:[0-9]+]], x0 +; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xff{{$}} ; CHECK: uxtb w0, [[REG2]] %conv = trunc i64 %a to i8 ret i8 %conv @@ -392,7 +393,8 @@ entry: define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i1 -; CHECK: and [[REG2:w[0-9]+]], w0, #0x1 +; CHECK: mov x[[TMP:[0-9]+]], x0 +; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0x1{{$}} ; CHECK: and w0, [[REG2]], #0x1 %conv = trunc i64 %a to i1 ret i1 %conv diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index e1e889b906c01..8d35af2737b48 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -210,10 +210,10 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun ; ; FAST-LABEL: test_vcvt_high_f32_f64: ; FAST: // %bb.0: -; FAST-NEXT: // implicit-def: $q2 ; FAST-NEXT: mov.16b v2, v0 -; FAST-NEXT: fcvtn2 v2.4s, v1.2d +; FAST-NEXT: // implicit-def: $q0 ; FAST-NEXT: mov.16b v0, v2 +; FAST-NEXT: fcvtn2 v0.4s, v1.2d ; FAST-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_f32_f64: @@ -249,10 +249,10 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou ; ; FAST-LABEL: test_vcvtx_high_f32_f64: ; FAST: // %bb.0: -; FAST-NEXT: // implicit-def: $q2 ; FAST-NEXT: mov.16b v2, v0 -; FAST-NEXT: fcvtxn2 v2.4s, v1.2d +; FAST-NEXT: // implicit-def: $q0 ; FAST-NEXT: mov.16b v0, v2 +; FAST-NEXT: fcvtxn2 v0.4s, v1.2d ; FAST-NEXT: ret ; ; GISEL-LABEL: test_vcvtx_high_f32_f64: @@ -283,17 +283,12 @@ define i16 @to_half(float %in) { ; ; FAST-LABEL: to_half: ; FAST: // %bb.0: -; FAST-NEXT: sub sp, sp, #16 // =16 -; FAST-NEXT: .cfi_def_cfa_offset 16 -; FAST-NEXT: fcvt h0, s0 +; FAST-NEXT: fcvt h1, s0 ; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s1, w0 -; FAST-NEXT: mov.16b v1, v0 -; FAST-NEXT: fmov w8, s1 -; FAST-NEXT: mov w0, w8 -; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill -; FAST-NEXT: mov w0, w8 -; FAST-NEXT: add sp, sp, #16 // =16 +; FAST-NEXT: fmov s0, w0 +; FAST-NEXT: mov.16b v0, v1 +; FAST-NEXT: fmov w0, s0 +; FAST-NEXT: // kill: def $w1 killed $w0 ; FAST-NEXT: ret ; ; GISEL-LABEL: to_half: diff --git a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll index 0467a2cba8313..3c71ee1ee58cc 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll @@ -17,8 +17,9 @@ declare [2 x i32] @callee() define void @test_struct_return(i32* %addr) { ; CHECK-LABEL: test_struct_return: ; CHECK: bl _callee -; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32 -; CHECK-DAG: str w0 +; CHECK: x[[COPYX0:[0-9]+]], x0 +; CHECK-DAG: lsr [[HI:x[0-9]+]], x[[COPYX0]], #32 +; CHECK-DAG: str w[[COPYX0]] %res = call [2 x i32] @callee() %res.0 = extractvalue [2 x i32] %res, 0 store i32 %res.0, i32* %addr diff --git a/llvm/test/CodeGen/AArch64/arm64_32-null.ll b/llvm/test/CodeGen/AArch64/arm64_32-null.ll index 9d62c56248b5e..6360b6298160f 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32-null.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-null.ll @@ -13,11 +13,12 @@ define void @test_store(i8** %p) { define void @test_phi(i8** %p) { ; CHECK-LABEL: test_phi: ; CHECK: mov [[R1:x[0-9]+]], xzr -; CHECK: str [[R1]], [sp] +; CHECK: str [[R1]], [sp, #8] ; CHECK: b [[BB:LBB[0-9_]+]] ; CHECK: [[BB]]: -; CHECK: ldr x0, [sp] -; CHECK: str w0, [x{{.*}}] +; CHECK: ldr x0, [sp, #8] +; CHECK: mov w8, w0 +; CHECK: str w8, [x{{.*}}] bb0: br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll index 46532386783fa..9edf9e6d82df7 100644 --- a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll +++ b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll @@ -64,9 +64,9 @@ bb3: ; OPT: b.gt [[L:\.LBB[0-9_]+]] ; OPT: tbz w1, #0, [[L]] ; +; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]] ; NOOPT: subs w{{[0-9]+}}, w{{[0-9]+}}, #0 ; NOOPT: cset [[R1:w[0-9]+]], gt -; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]] ; NOOPT: str [[R1]], [sp, #[[SLOT1:[0-9]+]]] ; NOOPT: b .LBB ; NOOPT: ldr [[R2:w[0-9]+]], [sp, #[[SLOT1]]] diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll index bfb7b5809f210..43e36dd88209c 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -1,16 +1,16 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxrb [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1, uxtb ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrb [[STATUS]], w2, [x0] +; CHECK: stlxrb [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 +; CHECK: subs {{w[0-9]+}}, [[OLD]], w1, uxtb ; CHECK: cset {{w[0-9]+}}, eq %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic ret { i8, i1 } %res @@ -18,12 +18,12 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxrh [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1, uxth ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [x0] +; CHECK: stlxrh [[STATUS:w[3-9]]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -34,12 +34,12 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxr [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS]], w2, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -50,12 +50,12 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxr [[OLD:x[0-9]+]], [x0] +; CHECK: ldaxr [[OLD:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], x1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS]], x2, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], x2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{x[0-9]+}}, [[OLD]], x1 @@ -66,14 +66,15 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_128: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD_LO]], x2 ; CHECK: cset [[CMP_TMP:w[0-9]+]], ne ; CHECK: cmp [[OLD_HI]], x3 ; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne ; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0] +; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic @@ -86,17 +87,18 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nou @var128 = global i128 0 define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) { ; CHECK-LABEL: test_cmpxchg_128_unsplit: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK: ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]] ; CHECK: cset [[CMP_TMP:w[0-9]+]], ne ; CHECK: cmp [[OLD_HI]], [[DESIRED_HI]] ; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne ; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [x0] +; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll index 22a71f5701f12..c94751d77982f 100644 --- a/llvm/test/CodeGen/AArch64/combine-loads.ll +++ b/llvm/test/CodeGen/AArch64/combine-loads.ll @@ -6,10 +6,10 @@ define <2 x i64> @z(i64* nocapture nonnull readonly %p) { ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x0, #8] -; CHECK-NEXT: mov v0.d[0], x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: mov v0.d[0], x9 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %b = load i64, i64* %p %p2 = getelementptr i64, i64* %p, i64 1 diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll index f03955c4dcd3e..82e3c2d4d61a8 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -1,20 +1,19 @@ ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: cmpxchg_monotonic_32: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], w2, [x0] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], w1 -; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset [[STATUS]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov w0, [[OLD]] define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic %tmp1 = extractvalue { i32, i1 } %tmp0, 0 @@ -26,21 +25,20 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { ; CHECK-LABEL: cmpxchg_acq_rel_32_load: ; CHECK: // %bb.0: -; CHECK: ldr [[NEW:w[0-9]+]], [x2] +; CHECK: mov [[ADDR:x[0-9]+]], x0 +; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], w1 -; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset [[STATUS]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov w0, [[OLD]] define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 { %new = load i32, i32* %pnew %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire @@ -52,20 +50,19 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 } ; CHECK-LABEL: cmpxchg_seq_cst_64: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: ldaxr x0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], x2, [x0] +; CHECK-NEXT: stlxr [[STATUS]], x2, {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov x0, [[OLD]] define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 { %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst %tmp1 = extractvalue { i64, i1 } %tmp0, 0 diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll index 105969717e46b..2e5e988f0576c 100644 --- a/llvm/test/CodeGen/AArch64/popcount.ll +++ b/llvm/test/CodeGen/AArch64/popcount.ll @@ -6,15 +6,15 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount128: ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret Entry: %1 = load i128, i128* %0, align 16 @@ -32,24 +32,24 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) { ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: ldr x8, [x0, #8] ; CHECK-NEXT: ldr x9, [x0, #24] -; CHECK-NEXT: ldr d0, [x0, #16] -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: ldr d1, [x0, #16] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret Entry: @@ -69,10 +69,10 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: cnt v0.16b, v0.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: // kill: def $x0 killed $w0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x1, v0.d[1] diff --git a/llvm/test/CodeGen/AArch64/swift-return.ll b/llvm/test/CodeGen/AArch64/swift-return.ll index 2bf5e379b379e..2036faf39bdd0 100644 --- a/llvm/test/CodeGen/AArch64/swift-return.ll +++ b/llvm/test/CodeGen/AArch64/swift-return.ll @@ -203,10 +203,10 @@ declare swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6() ; CHECK-DAG: mov w3, w0 ; CHECK: ret ; CHECK-O0-LABEL: _gen7 -; CHECK-O0: str w0, [sp, #12] -; CHECK-O0: ldr w1, [sp, #12] -; CHECK-O0: ldr w2, [sp, #12] -; CHECK-O0: ldr w3, [sp, #12] +; CHECK-O0: mov w3, w0 +; CHECK-O0: mov w0, w3 +; CHECK-O0: mov w1, w3 +; CHECK-O0: mov w2, w3 define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 @@ -221,10 +221,10 @@ define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { ; CHECK: mov w3, w0 ; CHECK: ret ; CHECK-O0-LABEL: _gen9 -; CHECK-O0: str w0, [sp, #12] -; CHECK-O0: ldr w1, [sp, #12] -; CHECK-O0: ldr w2, [sp, #12] -; CHECK-O0: ldr w3, [sp, #12] +; CHECK-O0: mov w3, w0 +; CHECK-O0: mov w0, w3 +; CHECK-O0: mov w1, w3 +; CHECK-O0: mov w2, w3 define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll index a8635f682ff10..e219ef770f934 100644 --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -21,11 +21,10 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-O0-LABEL: foo: ; CHECK-O0: mov w{{.*}}, #16 ; CHECK-O0: malloc -; CHECK-O0: mov x1, x0 -; CHECK-O0-NOT: x1 +; CHECK-O0: mov x21, x0 +; CHECK-O0-NOT: x21 ; CHECK-O0: mov [[ID:w[0-9]+]], #1 ; CHECK-O0: strb [[ID]], [x0, #8] -; CHECK-O0: mov x21, x1 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -138,14 +137,12 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { ; CHECK-O0: cbz w0 ; CHECK-O0: mov w{{.*}}, #16 ; CHECK-O0: malloc -; CHECK-O0: mov [[ID:x[0-9]+]], x0 +; CHECK-O0: mov x21, x0 ; CHECK-O0: mov [[ID2:w[0-9]+]], #1 ; CHECK-O0: strb [[ID2]], [x0, #8] -; CHECK-O0: mov x21, [[ID]] ; CHECK-O0: ret ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]] -; CHECK-O0: mov x21, [[ID3]] +; CHECK-O0: ldr x21, [sp, [[SLOT]]] ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -179,10 +176,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0-AARCH64-LABEL: foo_loop: ; spill x21 -; CHECK-O0-AARCH64: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0-AARCH64: stur x21, [x29, [[SLOT:#-[0-9]+]]] ; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]] ; CHECK-O0-AARCH64: [[BB1]]: -; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT]]] +; CHECK-O0-AARCH64: ldur x0, [x29, [[SLOT]]] ; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2:#[0-9]+]]] ; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] ; CHECK-O0-AARCH64: mov w{{.*}}, #16 @@ -194,11 +191,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0-AARCH64:[[BB2]]: ; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT2]]] ; CHECK-O0-AARCH64: fcmp -; CHECK-O0-AARCH64: str x0, [sp] +; CHECK-O0-AARCH64: stur x0, [x29, [[SLOT]]] ; CHECK-O0-AARCH64: b.le [[BB1]] ; reload from stack -; CHECK-O0-AARCH64: ldr [[ID3:x[0-9]+]], [sp] -; CHECK-O0-AARCH64: mov x21, [[ID3]] +; CHECK-O0-AARCH64: ldr x21, [sp] ; CHECK-O0-AARCH64: ret ; CHECK-O0-ARM64_32-LABEL: foo_loop: @@ -215,14 +211,12 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0-ARM64_32: strb w{{.*}}, ; CHECK-O0-ARM64_32:[[BB2]]: ; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT2]]] -; CHECK-O0-ARM64_32: fcmp ; CHECK-O0-ARM64_32: str x0, [sp[[OFFSET:.*]]] +; CHECK-O0-ARM64_32: fcmp ; CHECK-O0-ARM64_32: b.le [[BB1]] ; reload from stack -; CHECK-O0-ARM64_32: ldr [[ID3:x[0-9]+]], [sp[[OFFSET]]] -; CHECK-O0-ARM64_32: mov x21, [[ID3]] +; CHECK-O0-ARM64_32: ldr x21, [sp[[OFFSET]]] ; CHECK-O0-ARM64_32: ret - entry: br label %bb_loop @@ -261,16 +255,16 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK-APPLE-NOT: x21 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: mov w{{.*}}, #16 ; spill x8 ; CHECK-O0-DAG: str x8 +; CHECK-O0: mov w{{.*}}, #16 ; CHECK-O0: malloc +; CHECK-O0: mov x10, x0 +; CHECK-O0: mov x21, x10 ; CHECK-O0: mov [[ID:w[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [x0, #8] +; CHECK-O0: strb [[ID]], [x10, #8] ; reload from stack -; CHECK-O0: ldr [[SRET:x[0-9]+]] -; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4] -; CHECK-O0: mov x21 +; CHECK-O0: str w{{.*}}, [x8, #4] ; CHECK-O0-NOT: x21 entry: %call = call i8* @malloc(i64 16) @@ -299,7 +293,7 @@ define float @caller3(i8* %error_ref) { ; CHECK-O0-LABEL: caller3: ; spill x0 -; CHECK-O0: str x0 +; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]] ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:x[0-9]+]], x21 @@ -307,8 +301,8 @@ define float @caller3(i8* %error_ref) { ; CHECK-O0-ARM64_32: cmp x21, #0 ; Access part of the error object and save it to error_ref ; reload from stack +; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]] ; CHECK-O0: ldrb [[CODE:w[0-9]+]] -; CHECK-O0: ldr [[ID:x[0-9]+]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK-O0: bl {{.*}}free entry: @@ -630,11 +624,10 @@ declare swiftcc void @foo2(%swift_error** swifterror) ; Make sure we properly assign registers during fast-isel. ; CHECK-O0-LABEL: testAssign -; CHECK-O0: mov [[TMP:x.*]], xzr -; CHECK-O0: mov x21, [[TMP]] +; CHECK-O0: mov x21, xzr ; CHECK-O0: bl _foo2 ; CHECK-O0: str x21, [s[[STK:.*]]] -; CHECK-O0: ldr x0, [s[[STK]]] +; CHECK-O0: ldr x{{[0-9]+}}, [s[[STK]]] ; CHECK-APPLE-LABEL: testAssign ; CHECK-APPLE: mov x21, xzr diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir index aacc3c6542c77..7642c826acff5 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir +++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir @@ -81,14 +81,14 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $b21, -240 ; CHECK: frame-setup CFI_INSTRUCTION offset $b22, -256 ; CHECK: frame-setup CFI_INSTRUCTION offset $b23, -272 + ; CHECK: STRQui $q0, $sp, 0 :: (store 16 into %stack.1) ; CHECK: EH_LABEL - ; CHECK: STRQui $q0, $sp, 1 :: (store 16 into %stack.0) - ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit killed $q0, implicit-def $q0 + ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $q0 + ; CHECK: STRQui killed $q0, $sp, 1 :: (store 16 into %stack.0) ; CHECK: EH_LABEL - ; CHECK: STRQui killed $q0, $sp, 0 :: (store 16 into %stack.1) ; CHECK: B %bb.1 ; CHECK: bb.1..Lcontinue: - ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1) + ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0) ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) @@ -103,7 +103,7 @@ body: | ; CHECK: bb.2..Lunwind (landing-pad): ; CHECK: liveins: $x0, $x1 ; CHECK: EH_LABEL - ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0) + ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1) ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll index 68fec08255428..33bbdfaa2cfd1 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -50,14 +50,14 @@ define @invoke_callee_may_throw_sve( %v) pe ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: bl may_throw_sve ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_1: // %.Lcontinue -; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -92,7 +92,7 @@ define @invoke_callee_may_throw_sve( %v) pe ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %.Lunwind ; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -172,14 +172,14 @@ define @invoke_callee_may_throw_sve( %v) pe ; GISEL-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp0: -; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; GISEL-NEXT: bl may_throw_sve ; GISEL-NEXT: .Ltmp1: -; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; GISEL-NEXT: b .LBB0_1 ; GISEL-NEXT: .LBB0_1: // %.Lcontinue -; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -214,7 +214,7 @@ define @invoke_callee_may_throw_sve( %v) pe ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_2: // %.Lunwind ; GISEL-NEXT: .Ltmp2: -; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -293,14 +293,14 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; CHECK-NEXT: .cfi_offset b21, -240 ; CHECK-NEXT: .cfi_offset b22, -256 ; CHECK-NEXT: .cfi_offset b23, -272 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp3: -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl may_throw_neon ; CHECK-NEXT: .Ltmp4: -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: b .LBB1_1 ; CHECK-NEXT: .LBB1_1: // %.Lcontinue -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -314,7 +314,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %.Lunwind ; CHECK-NEXT: .Ltmp5: -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -360,13 +360,13 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; GISEL-NEXT: .cfi_offset b21, -240 ; GISEL-NEXT: .cfi_offset b22, -256 ; GISEL-NEXT: .cfi_offset b23, -272 +; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp3: -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; GISEL-NEXT: bl may_throw_neon +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp4: -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: // %bb.1: // %.Lcontinue -; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -380,7 +380,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_2: // %.Lunwind ; GISEL-NEXT: .Ltmp5: -; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll index 587f808bc55e7..6515d25f74157 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll @@ -28,15 +28,15 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_mov_b32 s4, 7 +; CHECK-NEXT: s_mov_b32 s5, 7 ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_mov_b32 s5, 8 +; CHECK-NEXT: s_mov_b32 s4, 8 ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_add_u32 s5, s4, s5 +; CHECK-NEXT: s_add_u32 s4, s5, s4 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s5 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 6da332a596fb0..22a4fc98b436d 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -17,29 +17,28 @@ ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 -; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec -; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] - ; Spill load ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 ; Spill saved exec +; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1 -; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:20 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} ; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: ; %bb.{{[0-9]+}}: ; %if +; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] -; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -53,9 +52,7 @@ ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] - - -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:20 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 @@ -88,29 +85,26 @@ endif: ; VGPR: workitem_private_segment_byte_size = 16{{$}} ; GCN: {{^}}; %bb.0: - -; GCN: s_mov_b32 m0, -1 -; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] - -; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 - -; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec -; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] +; GCN-DAG: s_mov_b32 m0, -1 +; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}} +; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]] +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0 ; Spill load ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] - ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1 -; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:24 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} +; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] +; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} ; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]] @@ -127,7 +121,7 @@ endif: ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:24 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 @@ -139,7 +133,7 @@ endif: define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %load0 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile i32, i32 addrspace(3)* null %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %loop, label %end @@ -161,8 +155,12 @@ end: ; GCN-LABEL: {{^}}divergent_if_else_endif: ; GCN: {{^}}; %bb.0: -; GCN: s_mov_b32 m0, -1 -; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] +; GCN-DAG: s_mov_b32 m0, -1 +; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}} +; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]] + +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0 ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], [[ZERO]], v0 @@ -171,9 +169,6 @@ end: ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill - ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] @@ -192,7 +187,6 @@ end: ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] - ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[SAVEEXEC_OFFSET]] ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0 @@ -219,8 +213,8 @@ end: ; GCN: ; %bb.{{[0-9]+}}: ; %if -; GCN: ds_read_b32 ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload +; GCN: ds_read_b32 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] @@ -248,7 +242,7 @@ end: define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %load0 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile i32, i32 addrspace(3)* null %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %if, label %else diff --git a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir new file mode 100644 index 0000000000000..d50de439d47d5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s + +# Make sure incorrect kills aren't emitted on vcc + +--- +name: foo +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: foo + ; CHECK: liveins: $vgpr0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: $sgpr4_sgpr5 = COPY $vcc + ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $vcc, implicit $exec + ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed $sgpr4_sgpr5 + %0:vgpr_32 = COPY $vgpr0 + V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec + $sgpr4_sgpr5 = COPY $vcc + %1:sreg_64_xexec = COPY $vcc + %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5 + +... + +# This would hit "Unexpected reg unit state" assert. +--- +name: bar +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: bar + ; CHECK: liveins: $vgpr0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec + ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc + %0:vgpr_32 = COPY $vgpr0 + V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec + %3:sreg_64_xexec = COPY $vcc + %1:sreg_64_xexec = COPY $vcc + %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0, implicit $vgpr0, implicit %3 + +... diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir new file mode 100644 index 0000000000000..bf32ebaf473d8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s + +# This would hit "Illegal subregister index for physical register" verifier error since +# tied operands would skip dropping the subregister index. + +--- +name: invalid_subreg_index +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; CHECK-LABEL: name: invalid_subreg_index + ; CHECK: liveins: $vgpr0, $sgpr0 + ; CHECK: $m0 = COPY renamable $sgpr0 + ; CHECK: undef renamable $vgpr1 = V_INTERP_P2_F32 undef $vgpr1, undef $vgpr0, 0, 1, implicit $mode, implicit $m0, implicit $exec, implicit-def dead $vgpr0_vgpr1 + ; CHECK: S_ENDPGM 0, implicit killed renamable $sgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + undef %2.sub1:vreg_64 = V_INTERP_P2_F32 undef %2.sub1, undef %0:vgpr_32, 0, 1, implicit $mode, implicit $m0, implicit $exec + S_ENDPGM 0, implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir index 32de262837816..7e70eb3a952c5 100644 --- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir +++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir @@ -18,7 +18,7 @@ body: | ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -53,9 +53,10 @@ body: | ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -92,9 +93,10 @@ body: | ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -128,9 +130,9 @@ body: | ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec - ; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -164,9 +166,8 @@ body: | ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3 - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec - ; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5) + ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3 + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll index 3d3b511ab34b7..c6ba507068120 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -12,101 +12,96 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: bb.0.entry: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN: renamable $sgpr2 = COPY renamable $sgpr1 + ; GCN: renamable $sgpr6 = COPY renamable $sgpr1 ; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 - ; GCN: renamable $sgpr1 = S_MOV_B32 61440 - ; GCN: renamable $sgpr3 = S_MOV_B32 -1 - ; GCN: undef renamable $sgpr4 = COPY killed renamable $sgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN: renamable $sgpr5 = COPY killed renamable $sgpr2 - ; GCN: renamable $sgpr6 = COPY killed renamable $sgpr3 - ; GCN: renamable $sgpr7 = COPY killed renamable $sgpr1 + ; GCN: renamable $sgpr4 = S_MOV_B32 61440 + ; GCN: renamable $sgpr5 = S_MOV_B32 -1 + ; GCN: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: renamable $sgpr1 = COPY killed renamable $sgpr6 + ; GCN: renamable $sgpr2 = COPY killed renamable $sgpr5 + ; GCN: renamable $sgpr3 = COPY killed renamable $sgpr4 + ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.2, align 4, addrspace 5) ; GCN: renamable $sgpr0 = S_MOV_B32 16 ; GCN: renamable $sgpr1 = S_MOV_B32 15 ; GCN: renamable $sgpr2 = S_MOV_B32 14 ; GCN: renamable $sgpr3 = S_MOV_B32 13 - ; GCN: renamable $sgpr8 = S_MOV_B32 12 - ; GCN: renamable $sgpr9 = S_MOV_B32 11 - ; GCN: renamable $sgpr10 = S_MOV_B32 10 - ; GCN: renamable $sgpr11 = S_MOV_B32 9 - ; GCN: renamable $sgpr12 = S_MOV_B32 8 - ; GCN: renamable $sgpr13 = S_MOV_B32 7 - ; GCN: renamable $sgpr14 = S_MOV_B32 6 - ; GCN: renamable $sgpr15 = S_MOV_B32 5 - ; GCN: renamable $sgpr16 = S_MOV_B32 3 - ; GCN: renamable $sgpr17 = S_MOV_B32 2 - ; GCN: renamable $sgpr18 = S_MOV_B32 1 - ; GCN: renamable $sgpr19 = S_MOV_B32 0 - ; GCN: renamable $vgpr1 = COPY killed renamable $sgpr19 - ; GCN: renamable $vgpr2 = COPY killed renamable $sgpr18 - ; GCN: renamable $vgpr3 = COPY killed renamable $sgpr17 - ; GCN: renamable $vgpr4 = COPY killed renamable $sgpr16 - ; GCN: renamable $vgpr5 = COPY killed renamable $sgpr15 - ; GCN: renamable $vgpr6 = COPY killed renamable $sgpr14 - ; GCN: renamable $vgpr7 = COPY killed renamable $sgpr13 - ; GCN: renamable $vgpr8 = COPY killed renamable $sgpr12 - ; GCN: renamable $vgpr9 = COPY killed renamable $sgpr11 - ; GCN: renamable $vgpr10 = COPY killed renamable $sgpr10 - ; GCN: renamable $vgpr11 = COPY killed renamable $sgpr9 - ; GCN: renamable $vgpr12 = COPY killed renamable $sgpr8 - ; GCN: renamable $vgpr13 = COPY killed renamable $sgpr3 - ; GCN: renamable $vgpr14 = COPY killed renamable $sgpr2 - ; GCN: renamable $vgpr15 = COPY killed renamable $sgpr1 + ; GCN: renamable $sgpr4 = S_MOV_B32 12 + ; GCN: renamable $sgpr5 = S_MOV_B32 11 + ; GCN: renamable $sgpr6 = S_MOV_B32 10 + ; GCN: renamable $sgpr7 = S_MOV_B32 9 + ; GCN: renamable $sgpr8 = S_MOV_B32 8 + ; GCN: renamable $sgpr9 = S_MOV_B32 7 + ; GCN: renamable $sgpr10 = S_MOV_B32 6 + ; GCN: renamable $sgpr11 = S_MOV_B32 5 + ; GCN: renamable $sgpr12 = S_MOV_B32 3 + ; GCN: renamable $sgpr13 = S_MOV_B32 2 + ; GCN: renamable $sgpr14 = S_MOV_B32 1 + ; GCN: renamable $sgpr15 = S_MOV_B32 0 + ; GCN: renamable $vgpr0 = COPY killed renamable $sgpr15 + ; GCN: renamable $vgpr30 = COPY killed renamable $sgpr14 + ; GCN: renamable $vgpr29 = COPY killed renamable $sgpr13 + ; GCN: renamable $vgpr28 = COPY killed renamable $sgpr12 + ; GCN: renamable $vgpr27 = COPY killed renamable $sgpr11 + ; GCN: renamable $vgpr26 = COPY killed renamable $sgpr10 + ; GCN: renamable $vgpr25 = COPY killed renamable $sgpr9 + ; GCN: renamable $vgpr24 = COPY killed renamable $sgpr8 + ; GCN: renamable $vgpr23 = COPY killed renamable $sgpr7 + ; GCN: renamable $vgpr22 = COPY killed renamable $sgpr6 + ; GCN: renamable $vgpr21 = COPY killed renamable $sgpr5 + ; GCN: renamable $vgpr20 = COPY killed renamable $sgpr4 + ; GCN: renamable $vgpr19 = COPY killed renamable $sgpr3 + ; GCN: renamable $vgpr18 = COPY killed renamable $sgpr2 + ; GCN: renamable $vgpr17 = COPY killed renamable $sgpr1 ; GCN: renamable $vgpr16 = COPY killed renamable $sgpr0 - ; GCN: undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 - ; GCN: renamable $vgpr18 = COPY killed renamable $vgpr2 - ; GCN: renamable $vgpr19 = COPY killed renamable $vgpr3 - ; GCN: renamable $vgpr20 = COPY killed renamable $vgpr4 - ; GCN: renamable $vgpr21 = COPY killed renamable $vgpr5 - ; GCN: renamable $vgpr22 = COPY killed renamable $vgpr6 - ; GCN: renamable $vgpr23 = COPY killed renamable $vgpr7 - ; GCN: renamable $vgpr24 = COPY killed renamable $vgpr8 - ; GCN: renamable $vgpr25 = COPY killed renamable $vgpr9 - ; GCN: renamable $vgpr26 = COPY killed renamable $vgpr10 - ; GCN: renamable $vgpr27 = COPY killed renamable $vgpr11 - ; GCN: renamable $vgpr28 = COPY killed renamable $vgpr12 - ; GCN: renamable $vgpr29 = COPY killed renamable $vgpr13 - ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 - ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 - ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 + ; GCN: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN: renamable $vgpr1 = COPY killed renamable $vgpr30 + ; GCN: renamable $vgpr2 = COPY killed renamable $vgpr29 + ; GCN: renamable $vgpr3 = COPY killed renamable $vgpr28 + ; GCN: renamable $vgpr4 = COPY killed renamable $vgpr27 + ; GCN: renamable $vgpr5 = COPY killed renamable $vgpr26 + ; GCN: renamable $vgpr6 = COPY killed renamable $vgpr25 + ; GCN: renamable $vgpr7 = COPY killed renamable $vgpr24 + ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr23 + ; GCN: renamable $vgpr9 = COPY killed renamable $vgpr22 + ; GCN: renamable $vgpr10 = COPY killed renamable $vgpr21 + ; GCN: renamable $vgpr11 = COPY killed renamable $vgpr20 + ; GCN: renamable $vgpr12 = COPY killed renamable $vgpr19 + ; GCN: renamable $vgpr13 = COPY killed renamable $vgpr18 + ; GCN: renamable $vgpr14 = COPY killed renamable $vgpr17 + ; GCN: renamable $vgpr15 = COPY killed renamable $vgpr16 + ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.1, align 4, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec - ; GCN: renamable $vgpr1 = IMPLICIT_DEF - ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) - ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr0 = IMPLICIT_DEF + ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5) - ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) - ; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec - ; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec - ; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.4, align 4, addrspace 5) + ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5) + ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.1, align 4, addrspace 5) + ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec + ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec + ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit-def undef $mode, implicit $m0, implicit $mode - ; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5) - ; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0 + ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 - ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) - ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5) + ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GCN: bb.3: ; GCN: successors: %bb.2(0x80000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.3, align 4, addrspace 5) - ; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1 + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 ; GCN: bb.2: - ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.1, align 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) + ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5) + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) ; GCN: S_ENDPGM 0 entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 230cd8eb5b0d5..4dfc9bce69aae 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -236,17 +236,18 @@ entry: ; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s{{[0-9]+}} ; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s{{[0-9]+}} ; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill +; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill -; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 +; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]] +; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]] +; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]] ; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] @@ -255,37 +256,37 @@ entry: ; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}} ; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]] ; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill ; W64-O0: s_xor_b64 exec, exec, [[SAVE]] ; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB0]] -; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]] + +; XXX-W64-O0: s_mov_b64 exec, [[SAVEEXEC]] ; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill ; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]] -; W64-O0: ; %bb.{{[0-9]+}}: +; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1 +; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill ; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec -; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill ; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]] -; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] +; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] -; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 +; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]] +; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]] +; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]] ; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] @@ -294,12 +295,10 @@ entry: ; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}} ; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]] ; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index b119ffd303e08..dccee0a298a30 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -15,381 +15,379 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[12:19] +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 8 +; GCN-NEXT: v_writelane_b32 v0, s5, 9 +; GCN-NEXT: v_writelane_b32 v0, s6, 10 +; GCN-NEXT: v_writelane_b32 v0, s7, 11 +; GCN-NEXT: v_writelane_b32 v0, s8, 12 +; GCN-NEXT: v_writelane_b32 v0, s9, 13 +; GCN-NEXT: v_writelane_b32 v0, s10, 14 +; GCN-NEXT: v_writelane_b32 v0, s11, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 16 +; GCN-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-NEXT: v_writelane_b32 v0, s6, 18 +; GCN-NEXT: v_writelane_b32 v0, s7, 19 +; GCN-NEXT: v_writelane_b32 v0, s8, 20 +; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v0, s10, 22 +; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 24 +; GCN-NEXT: v_writelane_b32 v0, s5, 25 +; GCN-NEXT: v_writelane_b32 v0, s6, 26 +; GCN-NEXT: v_writelane_b32 v0, s7, 27 +; GCN-NEXT: v_writelane_b32 v0, s8, 28 +; GCN-NEXT: v_writelane_b32 v0, s9, 29 +; GCN-NEXT: v_writelane_b32 v0, s10, 30 +; GCN-NEXT: v_writelane_b32 v0, s11, 31 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[20:27] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 32 +; GCN-NEXT: v_writelane_b32 v0, s5, 33 +; GCN-NEXT: v_writelane_b32 v0, s6, 34 +; GCN-NEXT: v_writelane_b32 v0, s7, 35 +; GCN-NEXT: v_writelane_b32 v0, s8, 36 +; GCN-NEXT: v_writelane_b32 v0, s9, 37 +; GCN-NEXT: v_writelane_b32 v0, s10, 38 +; GCN-NEXT: v_writelane_b32 v0, s11, 39 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[36:43] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 40 +; GCN-NEXT: v_writelane_b32 v0, s5, 41 +; GCN-NEXT: v_writelane_b32 v0, s6, 42 +; GCN-NEXT: v_writelane_b32 v0, s7, 43 +; GCN-NEXT: v_writelane_b32 v0, s8, 44 +; GCN-NEXT: v_writelane_b32 v0, s9, 45 +; GCN-NEXT: v_writelane_b32 v0, s10, 46 +; GCN-NEXT: v_writelane_b32 v0, s11, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[44:51] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[52:59] +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 56 +; GCN-NEXT: v_writelane_b32 v0, s5, 57 +; GCN-NEXT: v_writelane_b32 v0, s6, 58 +; GCN-NEXT: v_writelane_b32 v0, s7, 59 +; GCN-NEXT: v_writelane_b32 v0, s8, 60 +; GCN-NEXT: v_writelane_b32 v0, s9, 61 +; GCN-NEXT: v_writelane_b32 v0, s10, 62 +; GCN-NEXT: v_writelane_b32 v0, s11, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[60:67] +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 8 +; GCN-NEXT: v_writelane_b32 v1, s5, 9 +; GCN-NEXT: v_writelane_b32 v1, s6, 10 +; GCN-NEXT: v_writelane_b32 v1, s7, 11 +; GCN-NEXT: v_writelane_b32 v1, s8, 12 +; GCN-NEXT: v_writelane_b32 v1, s9, 13 +; GCN-NEXT: v_writelane_b32 v1, s10, 14 +; GCN-NEXT: v_writelane_b32 v1, s11, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 16 +; GCN-NEXT: v_writelane_b32 v1, s5, 17 +; GCN-NEXT: v_writelane_b32 v1, s6, 18 +; GCN-NEXT: v_writelane_b32 v1, s7, 19 +; GCN-NEXT: v_writelane_b32 v1, s8, 20 +; GCN-NEXT: v_writelane_b32 v1, s9, 21 +; GCN-NEXT: v_writelane_b32 v1, s10, 22 +; GCN-NEXT: v_writelane_b32 v1, s11, 23 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 24 +; GCN-NEXT: v_writelane_b32 v1, s5, 25 +; GCN-NEXT: v_writelane_b32 v1, s6, 26 +; GCN-NEXT: v_writelane_b32 v1, s7, 27 +; GCN-NEXT: v_writelane_b32 v1, s8, 28 +; GCN-NEXT: v_writelane_b32 v1, s9, 29 +; GCN-NEXT: v_writelane_b32 v1, s10, 30 +; GCN-NEXT: v_writelane_b32 v1, s11, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 32 +; GCN-NEXT: v_writelane_b32 v1, s5, 33 +; GCN-NEXT: v_writelane_b32 v1, s6, 34 +; GCN-NEXT: v_writelane_b32 v1, s7, 35 +; GCN-NEXT: v_writelane_b32 v1, s8, 36 +; GCN-NEXT: v_writelane_b32 v1, s9, 37 +; GCN-NEXT: v_writelane_b32 v1, s10, 38 +; GCN-NEXT: v_writelane_b32 v1, s11, 39 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 40 +; GCN-NEXT: v_writelane_b32 v1, s5, 41 +; GCN-NEXT: v_writelane_b32 v1, s6, 42 +; GCN-NEXT: v_writelane_b32 v1, s7, 43 +; GCN-NEXT: v_writelane_b32 v1, s8, 44 +; GCN-NEXT: v_writelane_b32 v1, s9, 45 +; GCN-NEXT: v_writelane_b32 v1, s10, 46 +; GCN-NEXT: v_writelane_b32 v1, s11, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[68:75] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 48 +; GCN-NEXT: v_writelane_b32 v1, s5, 49 +; GCN-NEXT: v_writelane_b32 v1, s6, 50 +; GCN-NEXT: v_writelane_b32 v1, s7, 51 +; GCN-NEXT: v_writelane_b32 v1, s8, 52 +; GCN-NEXT: v_writelane_b32 v1, s9, 53 +; GCN-NEXT: v_writelane_b32 v1, s10, 54 +; GCN-NEXT: v_writelane_b32 v1, s11, 55 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[76:83] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 56 +; GCN-NEXT: v_writelane_b32 v1, s5, 57 +; GCN-NEXT: v_writelane_b32 v1, s6, 58 +; GCN-NEXT: v_writelane_b32 v1, s7, 59 +; GCN-NEXT: v_writelane_b32 v1, s8, 60 +; GCN-NEXT: v_writelane_b32 v1, s9, 61 +; GCN-NEXT: v_writelane_b32 v1, s10, 62 +; GCN-NEXT: v_writelane_b32 v1, s11, 63 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[84:91] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s4, 1 -; GCN-NEXT: v_writelane_b32 v0, s5, 2 -; GCN-NEXT: v_writelane_b32 v0, s6, 3 -; GCN-NEXT: v_writelane_b32 v0, s7, 4 -; GCN-NEXT: v_writelane_b32 v0, s8, 5 -; GCN-NEXT: v_writelane_b32 v0, s9, 6 -; GCN-NEXT: v_writelane_b32 v0, s10, 7 -; GCN-NEXT: v_writelane_b32 v0, s11, 8 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 9 -; GCN-NEXT: v_writelane_b32 v0, s1, 10 -; GCN-NEXT: v_writelane_b32 v0, s2, 11 -; GCN-NEXT: v_writelane_b32 v0, s3, 12 -; GCN-NEXT: v_writelane_b32 v0, s4, 13 -; GCN-NEXT: v_writelane_b32 v0, s5, 14 -; GCN-NEXT: v_writelane_b32 v0, s6, 15 -; GCN-NEXT: v_writelane_b32 v0, s7, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s1, 18 -; GCN-NEXT: v_writelane_b32 v0, s2, 19 -; GCN-NEXT: v_writelane_b32 v0, s3, 20 -; GCN-NEXT: v_writelane_b32 v0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s5, 22 -; GCN-NEXT: v_writelane_b32 v0, s6, 23 -; GCN-NEXT: v_writelane_b32 v0, s7, 24 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 25 -; GCN-NEXT: v_writelane_b32 v0, s1, 26 -; GCN-NEXT: v_writelane_b32 v0, s2, 27 -; GCN-NEXT: v_writelane_b32 v0, s3, 28 -; GCN-NEXT: v_writelane_b32 v0, s4, 29 -; GCN-NEXT: v_writelane_b32 v0, s5, 30 -; GCN-NEXT: v_writelane_b32 v0, s6, 31 -; GCN-NEXT: v_writelane_b32 v0, s7, 32 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 33 -; GCN-NEXT: v_writelane_b32 v0, s1, 34 -; GCN-NEXT: v_writelane_b32 v0, s2, 35 -; GCN-NEXT: v_writelane_b32 v0, s3, 36 -; GCN-NEXT: v_writelane_b32 v0, s4, 37 -; GCN-NEXT: v_writelane_b32 v0, s5, 38 -; GCN-NEXT: v_writelane_b32 v0, s6, 39 -; GCN-NEXT: v_writelane_b32 v0, s7, 40 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 41 -; GCN-NEXT: v_writelane_b32 v0, s1, 42 -; GCN-NEXT: v_writelane_b32 v0, s2, 43 -; GCN-NEXT: v_writelane_b32 v0, s3, 44 -; GCN-NEXT: v_writelane_b32 v0, s4, 45 -; GCN-NEXT: v_writelane_b32 v0, s5, 46 -; GCN-NEXT: v_writelane_b32 v0, s6, 47 -; GCN-NEXT: v_writelane_b32 v0, s7, 48 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 49 -; GCN-NEXT: v_writelane_b32 v0, s1, 50 -; GCN-NEXT: v_writelane_b32 v0, s2, 51 -; GCN-NEXT: v_writelane_b32 v0, s3, 52 -; GCN-NEXT: v_writelane_b32 v0, s4, 53 -; GCN-NEXT: v_writelane_b32 v0, s5, 54 -; GCN-NEXT: v_writelane_b32 v0, s6, 55 -; GCN-NEXT: v_writelane_b32 v0, s7, 56 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s8, 0 -; GCN-NEXT: v_readlane_b32 s9, v0, 0 -; GCN-NEXT: s_cmp_lg_u32 s9, s8 -; GCN-NEXT: v_writelane_b32 v0, s12, 57 -; GCN-NEXT: v_writelane_b32 v0, s13, 58 -; GCN-NEXT: v_writelane_b32 v0, s14, 59 -; GCN-NEXT: v_writelane_b32 v0, s15, 60 -; GCN-NEXT: v_writelane_b32 v0, s16, 61 -; GCN-NEXT: v_writelane_b32 v0, s17, 62 -; GCN-NEXT: v_writelane_b32 v0, s18, 63 -; GCN-NEXT: v_writelane_b32 v1, s19, 0 -; GCN-NEXT: v_writelane_b32 v1, s20, 1 -; GCN-NEXT: v_writelane_b32 v1, s21, 2 -; GCN-NEXT: v_writelane_b32 v1, s22, 3 -; GCN-NEXT: v_writelane_b32 v1, s23, 4 -; GCN-NEXT: v_writelane_b32 v1, s24, 5 -; GCN-NEXT: v_writelane_b32 v1, s25, 6 -; GCN-NEXT: v_writelane_b32 v1, s26, 7 -; GCN-NEXT: v_writelane_b32 v1, s27, 8 -; GCN-NEXT: v_writelane_b32 v1, s36, 9 -; GCN-NEXT: v_writelane_b32 v1, s37, 10 -; GCN-NEXT: v_writelane_b32 v1, s38, 11 -; GCN-NEXT: v_writelane_b32 v1, s39, 12 -; GCN-NEXT: v_writelane_b32 v1, s40, 13 -; GCN-NEXT: v_writelane_b32 v1, s41, 14 -; GCN-NEXT: v_writelane_b32 v1, s42, 15 -; GCN-NEXT: v_writelane_b32 v1, s43, 16 -; GCN-NEXT: v_writelane_b32 v1, s44, 17 -; GCN-NEXT: v_writelane_b32 v1, s45, 18 -; GCN-NEXT: v_writelane_b32 v1, s46, 19 -; GCN-NEXT: v_writelane_b32 v1, s47, 20 -; GCN-NEXT: v_writelane_b32 v1, s48, 21 -; GCN-NEXT: v_writelane_b32 v1, s49, 22 -; GCN-NEXT: v_writelane_b32 v1, s50, 23 -; GCN-NEXT: v_writelane_b32 v1, s51, 24 -; GCN-NEXT: v_writelane_b32 v1, s52, 25 -; GCN-NEXT: v_writelane_b32 v1, s53, 26 -; GCN-NEXT: v_writelane_b32 v1, s54, 27 -; GCN-NEXT: v_writelane_b32 v1, s55, 28 -; GCN-NEXT: v_writelane_b32 v1, s56, 29 -; GCN-NEXT: v_writelane_b32 v1, s57, 30 -; GCN-NEXT: v_writelane_b32 v1, s58, 31 -; GCN-NEXT: v_writelane_b32 v1, s59, 32 -; GCN-NEXT: v_writelane_b32 v1, s60, 33 -; GCN-NEXT: v_writelane_b32 v1, s61, 34 -; GCN-NEXT: v_writelane_b32 v1, s62, 35 -; GCN-NEXT: v_writelane_b32 v1, s63, 36 -; GCN-NEXT: v_writelane_b32 v1, s64, 37 -; GCN-NEXT: v_writelane_b32 v1, s65, 38 -; GCN-NEXT: v_writelane_b32 v1, s66, 39 -; GCN-NEXT: v_writelane_b32 v1, s67, 40 -; GCN-NEXT: v_writelane_b32 v1, s68, 41 -; GCN-NEXT: v_writelane_b32 v1, s69, 42 -; GCN-NEXT: v_writelane_b32 v1, s70, 43 -; GCN-NEXT: v_writelane_b32 v1, s71, 44 -; GCN-NEXT: v_writelane_b32 v1, s72, 45 -; GCN-NEXT: v_writelane_b32 v1, s73, 46 -; GCN-NEXT: v_writelane_b32 v1, s74, 47 -; GCN-NEXT: v_writelane_b32 v1, s75, 48 -; GCN-NEXT: v_writelane_b32 v1, s76, 49 -; GCN-NEXT: v_writelane_b32 v1, s77, 50 -; GCN-NEXT: v_writelane_b32 v1, s78, 51 -; GCN-NEXT: v_writelane_b32 v1, s79, 52 -; GCN-NEXT: v_writelane_b32 v1, s80, 53 -; GCN-NEXT: v_writelane_b32 v1, s81, 54 -; GCN-NEXT: v_writelane_b32 v1, s82, 55 -; GCN-NEXT: v_writelane_b32 v1, s83, 56 -; GCN-NEXT: v_writelane_b32 v1, s84, 57 -; GCN-NEXT: v_writelane_b32 v1, s85, 58 -; GCN-NEXT: v_writelane_b32 v1, s86, 59 -; GCN-NEXT: v_writelane_b32 v1, s87, 60 -; GCN-NEXT: v_writelane_b32 v1, s88, 61 -; GCN-NEXT: v_writelane_b32 v1, s89, 62 -; GCN-NEXT: v_writelane_b32 v1, s90, 63 -; GCN-NEXT: v_writelane_b32 v2, s91, 0 -; GCN-NEXT: v_writelane_b32 v2, s0, 1 -; GCN-NEXT: v_writelane_b32 v2, s1, 2 -; GCN-NEXT: v_writelane_b32 v2, s2, 3 -; GCN-NEXT: v_writelane_b32 v2, s3, 4 -; GCN-NEXT: v_writelane_b32 v2, s4, 5 -; GCN-NEXT: v_writelane_b32 v2, s5, 6 -; GCN-NEXT: v_writelane_b32 v2, s6, 7 -; GCN-NEXT: v_writelane_b32 v2, s7, 8 +; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 1 -; GCN-NEXT: v_readlane_b32 s1, v0, 2 -; GCN-NEXT: v_readlane_b32 s2, v0, 3 -; GCN-NEXT: v_readlane_b32 s3, v0, 4 -; GCN-NEXT: v_readlane_b32 s4, v0, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 -; GCN-NEXT: v_readlane_b32 s6, v0, 7 -; GCN-NEXT: v_readlane_b32 s7, v0, 8 +; GCN-NEXT: v_readlane_b32 s8, v1, 56 +; GCN-NEXT: v_readlane_b32 s9, v1, 57 +; GCN-NEXT: v_readlane_b32 s10, v1, 58 +; GCN-NEXT: v_readlane_b32 s11, v1, 59 +; GCN-NEXT: v_readlane_b32 s12, v1, 60 +; GCN-NEXT: v_readlane_b32 s13, v1, 61 +; GCN-NEXT: v_readlane_b32 s14, v1, 62 +; GCN-NEXT: v_readlane_b32 s15, v1, 63 +; GCN-NEXT: v_readlane_b32 s16, v1, 48 +; GCN-NEXT: v_readlane_b32 s17, v1, 49 +; GCN-NEXT: v_readlane_b32 s18, v1, 50 +; GCN-NEXT: v_readlane_b32 s19, v1, 51 +; GCN-NEXT: v_readlane_b32 s20, v1, 52 +; GCN-NEXT: v_readlane_b32 s21, v1, 53 +; GCN-NEXT: v_readlane_b32 s22, v1, 54 +; GCN-NEXT: v_readlane_b32 s23, v1, 55 +; GCN-NEXT: v_readlane_b32 s24, v1, 40 +; GCN-NEXT: v_readlane_b32 s25, v1, 41 +; GCN-NEXT: v_readlane_b32 s26, v1, 42 +; GCN-NEXT: v_readlane_b32 s27, v1, 43 +; GCN-NEXT: v_readlane_b32 s28, v1, 44 +; GCN-NEXT: v_readlane_b32 s29, v1, 45 +; GCN-NEXT: v_readlane_b32 s30, v1, 46 +; GCN-NEXT: v_readlane_b32 s31, v1, 47 +; GCN-NEXT: v_readlane_b32 s36, v1, 32 +; GCN-NEXT: v_readlane_b32 s37, v1, 33 +; GCN-NEXT: v_readlane_b32 s38, v1, 34 +; GCN-NEXT: v_readlane_b32 s39, v1, 35 +; GCN-NEXT: v_readlane_b32 s40, v1, 36 +; GCN-NEXT: v_readlane_b32 s41, v1, 37 +; GCN-NEXT: v_readlane_b32 s42, v1, 38 +; GCN-NEXT: v_readlane_b32 s43, v1, 39 +; GCN-NEXT: v_readlane_b32 s44, v1, 24 +; GCN-NEXT: v_readlane_b32 s45, v1, 25 +; GCN-NEXT: v_readlane_b32 s46, v1, 26 +; GCN-NEXT: v_readlane_b32 s47, v1, 27 +; GCN-NEXT: v_readlane_b32 s48, v1, 28 +; GCN-NEXT: v_readlane_b32 s49, v1, 29 +; GCN-NEXT: v_readlane_b32 s50, v1, 30 +; GCN-NEXT: v_readlane_b32 s51, v1, 31 +; GCN-NEXT: v_readlane_b32 s52, v1, 16 +; GCN-NEXT: v_readlane_b32 s53, v1, 17 +; GCN-NEXT: v_readlane_b32 s54, v1, 18 +; GCN-NEXT: v_readlane_b32 s55, v1, 19 +; GCN-NEXT: v_readlane_b32 s56, v1, 20 +; GCN-NEXT: v_readlane_b32 s57, v1, 21 +; GCN-NEXT: v_readlane_b32 s58, v1, 22 +; GCN-NEXT: v_readlane_b32 s59, v1, 23 +; GCN-NEXT: v_readlane_b32 s60, v1, 8 +; GCN-NEXT: v_readlane_b32 s61, v1, 9 +; GCN-NEXT: v_readlane_b32 s62, v1, 10 +; GCN-NEXT: v_readlane_b32 s63, v1, 11 +; GCN-NEXT: v_readlane_b32 s64, v1, 12 +; GCN-NEXT: v_readlane_b32 s65, v1, 13 +; GCN-NEXT: v_readlane_b32 s66, v1, 14 +; GCN-NEXT: v_readlane_b32 s67, v1, 15 +; GCN-NEXT: v_readlane_b32 s68, v1, 0 +; GCN-NEXT: v_readlane_b32 s69, v1, 1 +; GCN-NEXT: v_readlane_b32 s70, v1, 2 +; GCN-NEXT: v_readlane_b32 s71, v1, 3 +; GCN-NEXT: v_readlane_b32 s72, v1, 4 +; GCN-NEXT: v_readlane_b32 s73, v1, 5 +; GCN-NEXT: v_readlane_b32 s74, v1, 6 +; GCN-NEXT: v_readlane_b32 s75, v1, 7 +; GCN-NEXT: v_readlane_b32 s76, v0, 56 +; GCN-NEXT: v_readlane_b32 s77, v0, 57 +; GCN-NEXT: v_readlane_b32 s78, v0, 58 +; GCN-NEXT: v_readlane_b32 s79, v0, 59 +; GCN-NEXT: v_readlane_b32 s80, v0, 60 +; GCN-NEXT: v_readlane_b32 s81, v0, 61 +; GCN-NEXT: v_readlane_b32 s82, v0, 62 +; GCN-NEXT: v_readlane_b32 s83, v0, 63 +; GCN-NEXT: v_readlane_b32 s84, v0, 48 +; GCN-NEXT: v_readlane_b32 s85, v0, 49 +; GCN-NEXT: v_readlane_b32 s86, v0, 50 +; GCN-NEXT: v_readlane_b32 s87, v0, 51 +; GCN-NEXT: v_readlane_b32 s88, v0, 52 +; GCN-NEXT: v_readlane_b32 s89, v0, 53 +; GCN-NEXT: v_readlane_b32 s90, v0, 54 +; GCN-NEXT: v_readlane_b32 s91, v0, 55 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 57 -; GCN-NEXT: v_readlane_b32 s1, v0, 58 -; GCN-NEXT: v_readlane_b32 s2, v0, 59 -; GCN-NEXT: v_readlane_b32 s3, v0, 60 -; GCN-NEXT: v_readlane_b32 s4, v0, 61 -; GCN-NEXT: v_readlane_b32 s5, v0, 62 -; GCN-NEXT: v_readlane_b32 s6, v0, 63 -; GCN-NEXT: v_readlane_b32 s7, v1, 0 +; GCN-NEXT: v_readlane_b32 s0, v0, 8 +; GCN-NEXT: v_readlane_b32 s1, v0, 9 +; GCN-NEXT: v_readlane_b32 s2, v0, 10 +; GCN-NEXT: v_readlane_b32 s3, v0, 11 +; GCN-NEXT: v_readlane_b32 s4, v0, 12 +; GCN-NEXT: v_readlane_b32 s5, v0, 13 +; GCN-NEXT: v_readlane_b32 s6, v0, 14 +; GCN-NEXT: v_readlane_b32 s7, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 1 -; GCN-NEXT: v_readlane_b32 s1, v1, 2 -; GCN-NEXT: v_readlane_b32 s2, v1, 3 -; GCN-NEXT: v_readlane_b32 s3, v1, 4 -; GCN-NEXT: v_readlane_b32 s4, v1, 5 -; GCN-NEXT: v_readlane_b32 s5, v1, 6 -; GCN-NEXT: v_readlane_b32 s6, v1, 7 -; GCN-NEXT: v_readlane_b32 s7, v1, 8 +; GCN-NEXT: v_readlane_b32 s0, v0, 16 +; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s2, v0, 18 +; GCN-NEXT: v_readlane_b32 s3, v0, 19 +; GCN-NEXT: v_readlane_b32 s4, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v0, 21 +; GCN-NEXT: v_readlane_b32 s6, v0, 22 +; GCN-NEXT: v_readlane_b32 s7, v0, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 9 -; GCN-NEXT: v_readlane_b32 s1, v1, 10 -; GCN-NEXT: v_readlane_b32 s2, v1, 11 -; GCN-NEXT: v_readlane_b32 s3, v1, 12 -; GCN-NEXT: v_readlane_b32 s4, v1, 13 -; GCN-NEXT: v_readlane_b32 s5, v1, 14 -; GCN-NEXT: v_readlane_b32 s6, v1, 15 -; GCN-NEXT: v_readlane_b32 s7, v1, 16 +; GCN-NEXT: v_readlane_b32 s0, v0, 24 +; GCN-NEXT: v_readlane_b32 s1, v0, 25 +; GCN-NEXT: v_readlane_b32 s2, v0, 26 +; GCN-NEXT: v_readlane_b32 s3, v0, 27 +; GCN-NEXT: v_readlane_b32 s4, v0, 28 +; GCN-NEXT: v_readlane_b32 s5, v0, 29 +; GCN-NEXT: v_readlane_b32 s6, v0, 30 +; GCN-NEXT: v_readlane_b32 s7, v0, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 17 -; GCN-NEXT: v_readlane_b32 s1, v1, 18 -; GCN-NEXT: v_readlane_b32 s2, v1, 19 -; GCN-NEXT: v_readlane_b32 s3, v1, 20 -; GCN-NEXT: v_readlane_b32 s4, v1, 21 -; GCN-NEXT: v_readlane_b32 s5, v1, 22 -; GCN-NEXT: v_readlane_b32 s6, v1, 23 -; GCN-NEXT: v_readlane_b32 s7, v1, 24 +; GCN-NEXT: v_readlane_b32 s0, v0, 32 +; GCN-NEXT: v_readlane_b32 s1, v0, 33 +; GCN-NEXT: v_readlane_b32 s2, v0, 34 +; GCN-NEXT: v_readlane_b32 s3, v0, 35 +; GCN-NEXT: v_readlane_b32 s4, v0, 36 +; GCN-NEXT: v_readlane_b32 s5, v0, 37 +; GCN-NEXT: v_readlane_b32 s6, v0, 38 +; GCN-NEXT: v_readlane_b32 s7, v0, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 25 -; GCN-NEXT: v_readlane_b32 s1, v1, 26 -; GCN-NEXT: v_readlane_b32 s2, v1, 27 -; GCN-NEXT: v_readlane_b32 s3, v1, 28 -; GCN-NEXT: v_readlane_b32 s4, v1, 29 -; GCN-NEXT: v_readlane_b32 s5, v1, 30 -; GCN-NEXT: v_readlane_b32 s6, v1, 31 -; GCN-NEXT: v_readlane_b32 s7, v1, 32 +; GCN-NEXT: v_readlane_b32 s0, v0, 40 +; GCN-NEXT: v_readlane_b32 s1, v0, 41 +; GCN-NEXT: v_readlane_b32 s2, v0, 42 +; GCN-NEXT: v_readlane_b32 s3, v0, 43 +; GCN-NEXT: v_readlane_b32 s4, v0, 44 +; GCN-NEXT: v_readlane_b32 s5, v0, 45 +; GCN-NEXT: v_readlane_b32 s6, v0, 46 +; GCN-NEXT: v_readlane_b32 s7, v0, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 33 -; GCN-NEXT: v_readlane_b32 s1, v1, 34 -; GCN-NEXT: v_readlane_b32 s2, v1, 35 -; GCN-NEXT: v_readlane_b32 s3, v1, 36 -; GCN-NEXT: v_readlane_b32 s4, v1, 37 -; GCN-NEXT: v_readlane_b32 s5, v1, 38 -; GCN-NEXT: v_readlane_b32 s6, v1, 39 -; GCN-NEXT: v_readlane_b32 s7, v1, 40 +; GCN-NEXT: v_readlane_b32 s0, v2, 0 +; GCN-NEXT: v_readlane_b32 s1, v2, 1 +; GCN-NEXT: v_readlane_b32 s2, v2, 2 +; GCN-NEXT: v_readlane_b32 s3, v2, 3 +; GCN-NEXT: v_readlane_b32 s4, v2, 4 +; GCN-NEXT: v_readlane_b32 s5, v2, 5 +; GCN-NEXT: v_readlane_b32 s6, v2, 6 +; GCN-NEXT: v_readlane_b32 s7, v2, 7 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[84:91] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 41 -; GCN-NEXT: v_readlane_b32 s1, v1, 42 -; GCN-NEXT: v_readlane_b32 s2, v1, 43 -; GCN-NEXT: v_readlane_b32 s3, v1, 44 -; GCN-NEXT: v_readlane_b32 s4, v1, 45 -; GCN-NEXT: v_readlane_b32 s5, v1, 46 -; GCN-NEXT: v_readlane_b32 s6, v1, 47 -; GCN-NEXT: v_readlane_b32 s7, v1, 48 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[76:83] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 49 -; GCN-NEXT: v_readlane_b32 s1, v1, 50 -; GCN-NEXT: v_readlane_b32 s2, v1, 51 -; GCN-NEXT: v_readlane_b32 s3, v1, 52 -; GCN-NEXT: v_readlane_b32 s4, v1, 53 -; GCN-NEXT: v_readlane_b32 s5, v1, 54 -; GCN-NEXT: v_readlane_b32 s6, v1, 55 -; GCN-NEXT: v_readlane_b32 s7, v1, 56 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[68:75] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 57 -; GCN-NEXT: v_readlane_b32 s1, v1, 58 -; GCN-NEXT: v_readlane_b32 s2, v1, 59 -; GCN-NEXT: v_readlane_b32 s3, v1, 60 -; GCN-NEXT: v_readlane_b32 s4, v1, 61 -; GCN-NEXT: v_readlane_b32 s5, v1, 62 -; GCN-NEXT: v_readlane_b32 s6, v1, 63 -; GCN-NEXT: v_readlane_b32 s7, v2, 0 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[60:67] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 9 -; GCN-NEXT: v_readlane_b32 s1, v0, 10 -; GCN-NEXT: v_readlane_b32 s2, v0, 11 -; GCN-NEXT: v_readlane_b32 s3, v0, 12 -; GCN-NEXT: v_readlane_b32 s4, v0, 13 -; GCN-NEXT: v_readlane_b32 s5, v0, 14 -; GCN-NEXT: v_readlane_b32 s6, v0, 15 -; GCN-NEXT: v_readlane_b32 s7, v0, 16 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[52:59] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 17 -; GCN-NEXT: v_readlane_b32 s1, v0, 18 -; GCN-NEXT: v_readlane_b32 s2, v0, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 20 -; GCN-NEXT: v_readlane_b32 s4, v0, 21 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 -; GCN-NEXT: v_readlane_b32 s6, v0, 23 -; GCN-NEXT: v_readlane_b32 s7, v0, 24 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[44:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 25 -; GCN-NEXT: v_readlane_b32 s1, v0, 26 -; GCN-NEXT: v_readlane_b32 s2, v0, 27 -; GCN-NEXT: v_readlane_b32 s3, v0, 28 -; GCN-NEXT: v_readlane_b32 s4, v0, 29 -; GCN-NEXT: v_readlane_b32 s5, v0, 30 -; GCN-NEXT: v_readlane_b32 s6, v0, 31 -; GCN-NEXT: v_readlane_b32 s7, v0, 32 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[36:43] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 33 -; GCN-NEXT: v_readlane_b32 s1, v0, 34 -; GCN-NEXT: v_readlane_b32 s2, v0, 35 -; GCN-NEXT: v_readlane_b32 s3, v0, 36 -; GCN-NEXT: v_readlane_b32 s4, v0, 37 -; GCN-NEXT: v_readlane_b32 s5, v0, 38 -; GCN-NEXT: v_readlane_b32 s6, v0, 39 -; GCN-NEXT: v_readlane_b32 s7, v0, 40 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[24:31] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 41 -; GCN-NEXT: v_readlane_b32 s1, v0, 42 -; GCN-NEXT: v_readlane_b32 s2, v0, 43 -; GCN-NEXT: v_readlane_b32 s3, v0, 44 -; GCN-NEXT: v_readlane_b32 s4, v0, 45 -; GCN-NEXT: v_readlane_b32 s5, v0, 46 -; GCN-NEXT: v_readlane_b32 s6, v0, 47 -; GCN-NEXT: v_readlane_b32 s7, v0, 48 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[16:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 49 -; GCN-NEXT: v_readlane_b32 s1, v0, 50 -; GCN-NEXT: v_readlane_b32 s2, v0, 51 -; GCN-NEXT: v_readlane_b32 s3, v0, 52 -; GCN-NEXT: v_readlane_b32 s4, v0, 53 -; GCN-NEXT: v_readlane_b32 s5, v0, 54 -; GCN-NEXT: v_readlane_b32 s6, v0, 55 -; GCN-NEXT: v_readlane_b32 s7, v0, 56 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[8:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v2, 1 -; GCN-NEXT: v_readlane_b32 s1, v2, 2 -; GCN-NEXT: v_readlane_b32 s2, v2, 3 -; GCN-NEXT: v_readlane_b32 s3, v2, 4 -; GCN-NEXT: v_readlane_b32 s4, v2, 5 -; GCN-NEXT: v_readlane_b32 s5, v2, 6 -; GCN-NEXT: v_readlane_b32 s6, v2, 7 -; GCN-NEXT: v_readlane_b32 s7, v2, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND @@ -448,191 +446,189 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s12, 8 +; GCN-NEXT: v_writelane_b32 v0, s13, 9 +; GCN-NEXT: v_writelane_b32 v0, s14, 10 +; GCN-NEXT: v_writelane_b32 v0, s15, 11 +; GCN-NEXT: v_writelane_b32 v0, s16, 12 +; GCN-NEXT: v_writelane_b32 v0, s17, 13 +; GCN-NEXT: v_writelane_b32 v0, s18, 14 +; GCN-NEXT: v_writelane_b32 v0, s19, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 16 +; GCN-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-NEXT: v_writelane_b32 v0, s6, 18 +; GCN-NEXT: v_writelane_b32 v0, s7, 19 +; GCN-NEXT: v_writelane_b32 v0, s8, 20 +; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v0, s10, 22 +; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: v_writelane_b32 v0, s12, 24 +; GCN-NEXT: v_writelane_b32 v0, s13, 25 +; GCN-NEXT: v_writelane_b32 v0, s14, 26 +; GCN-NEXT: v_writelane_b32 v0, s15, 27 +; GCN-NEXT: v_writelane_b32 v0, s16, 28 +; GCN-NEXT: v_writelane_b32 v0, s17, 29 +; GCN-NEXT: v_writelane_b32 v0, s18, 30 +; GCN-NEXT: v_writelane_b32 v0, s19, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 32 +; GCN-NEXT: v_writelane_b32 v0, s5, 33 +; GCN-NEXT: v_writelane_b32 v0, s6, 34 +; GCN-NEXT: v_writelane_b32 v0, s7, 35 +; GCN-NEXT: v_writelane_b32 v0, s8, 36 +; GCN-NEXT: v_writelane_b32 v0, s9, 37 +; GCN-NEXT: v_writelane_b32 v0, s10, 38 +; GCN-NEXT: v_writelane_b32 v0, s11, 39 +; GCN-NEXT: v_writelane_b32 v0, s12, 40 +; GCN-NEXT: v_writelane_b32 v0, s13, 41 +; GCN-NEXT: v_writelane_b32 v0, s14, 42 +; GCN-NEXT: v_writelane_b32 v0, s15, 43 +; GCN-NEXT: v_writelane_b32 v0, s16, 44 +; GCN-NEXT: v_writelane_b32 v0, s17, 45 +; GCN-NEXT: v_writelane_b32 v0, s18, 46 +; GCN-NEXT: v_writelane_b32 v0, s19, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[36:51] +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: v_writelane_b32 v0, s12, 56 +; GCN-NEXT: v_writelane_b32 v0, s13, 57 +; GCN-NEXT: v_writelane_b32 v0, s14, 58 +; GCN-NEXT: v_writelane_b32 v0, s15, 59 +; GCN-NEXT: v_writelane_b32 v0, s16, 60 +; GCN-NEXT: v_writelane_b32 v0, s17, 61 +; GCN-NEXT: v_writelane_b32 v0, s18, 62 +; GCN-NEXT: v_writelane_b32 v0, s19, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[2:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s2, 8 +; GCN-NEXT: v_writelane_b32 v1, s3, 9 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s4, 1 -; GCN-NEXT: v_writelane_b32 v0, s5, 2 -; GCN-NEXT: v_writelane_b32 v0, s6, 3 -; GCN-NEXT: v_writelane_b32 v0, s7, 4 -; GCN-NEXT: v_writelane_b32 v0, s8, 5 -; GCN-NEXT: v_writelane_b32 v0, s9, 6 -; GCN-NEXT: v_writelane_b32 v0, s10, 7 -; GCN-NEXT: v_writelane_b32 v0, s11, 8 -; GCN-NEXT: v_writelane_b32 v0, s12, 9 -; GCN-NEXT: v_writelane_b32 v0, s13, 10 -; GCN-NEXT: v_writelane_b32 v0, s14, 11 -; GCN-NEXT: v_writelane_b32 v0, s15, 12 -; GCN-NEXT: v_writelane_b32 v0, s16, 13 -; GCN-NEXT: v_writelane_b32 v0, s17, 14 -; GCN-NEXT: v_writelane_b32 v0, s18, 15 -; GCN-NEXT: v_writelane_b32 v0, s19, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:15] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[16:31] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s1, 18 -; GCN-NEXT: v_writelane_b32 v0, s2, 19 -; GCN-NEXT: v_writelane_b32 v0, s3, 20 -; GCN-NEXT: v_writelane_b32 v0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s5, 22 -; GCN-NEXT: v_writelane_b32 v0, s6, 23 -; GCN-NEXT: v_writelane_b32 v0, s7, 24 -; GCN-NEXT: v_writelane_b32 v0, s8, 25 -; GCN-NEXT: v_writelane_b32 v0, s9, 26 -; GCN-NEXT: v_writelane_b32 v0, s10, 27 -; GCN-NEXT: v_writelane_b32 v0, s11, 28 -; GCN-NEXT: v_writelane_b32 v0, s12, 29 -; GCN-NEXT: v_writelane_b32 v0, s13, 30 -; GCN-NEXT: v_writelane_b32 v0, s14, 31 -; GCN-NEXT: v_writelane_b32 v0, s15, 32 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[8:9] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s10, 0 -; GCN-NEXT: v_readlane_b32 s11, v0, 0 -; GCN-NEXT: s_cmp_lg_u32 s11, s10 -; GCN-NEXT: v_writelane_b32 v0, s36, 33 -; GCN-NEXT: v_writelane_b32 v0, s37, 34 -; GCN-NEXT: v_writelane_b32 v0, s38, 35 -; GCN-NEXT: v_writelane_b32 v0, s39, 36 -; GCN-NEXT: v_writelane_b32 v0, s40, 37 -; GCN-NEXT: v_writelane_b32 v0, s41, 38 -; GCN-NEXT: v_writelane_b32 v0, s42, 39 -; GCN-NEXT: v_writelane_b32 v0, s43, 40 -; GCN-NEXT: v_writelane_b32 v0, s44, 41 -; GCN-NEXT: v_writelane_b32 v0, s45, 42 -; GCN-NEXT: v_writelane_b32 v0, s46, 43 -; GCN-NEXT: v_writelane_b32 v0, s47, 44 -; GCN-NEXT: v_writelane_b32 v0, s48, 45 -; GCN-NEXT: v_writelane_b32 v0, s49, 46 -; GCN-NEXT: v_writelane_b32 v0, s50, 47 -; GCN-NEXT: v_writelane_b32 v0, s51, 48 -; GCN-NEXT: v_writelane_b32 v0, s16, 49 -; GCN-NEXT: v_writelane_b32 v0, s17, 50 -; GCN-NEXT: v_writelane_b32 v0, s18, 51 -; GCN-NEXT: v_writelane_b32 v0, s19, 52 -; GCN-NEXT: v_writelane_b32 v0, s20, 53 -; GCN-NEXT: v_writelane_b32 v0, s21, 54 -; GCN-NEXT: v_writelane_b32 v0, s22, 55 -; GCN-NEXT: v_writelane_b32 v0, s23, 56 -; GCN-NEXT: v_writelane_b32 v0, s24, 57 -; GCN-NEXT: v_writelane_b32 v0, s25, 58 -; GCN-NEXT: v_writelane_b32 v0, s26, 59 -; GCN-NEXT: v_writelane_b32 v0, s27, 60 -; GCN-NEXT: v_writelane_b32 v0, s28, 61 -; GCN-NEXT: v_writelane_b32 v0, s29, 62 -; GCN-NEXT: v_writelane_b32 v0, s30, 63 -; GCN-NEXT: v_writelane_b32 v1, s31, 0 -; GCN-NEXT: v_writelane_b32 v1, s0, 1 -; GCN-NEXT: v_writelane_b32 v1, s1, 2 -; GCN-NEXT: v_writelane_b32 v1, s2, 3 -; GCN-NEXT: v_writelane_b32 v1, s3, 4 -; GCN-NEXT: v_writelane_b32 v1, s4, 5 -; GCN-NEXT: v_writelane_b32 v1, s5, 6 -; GCN-NEXT: v_writelane_b32 v1, s6, 7 -; GCN-NEXT: v_writelane_b32 v1, s7, 8 -; GCN-NEXT: v_writelane_b32 v1, s8, 9 -; GCN-NEXT: v_writelane_b32 v1, s9, 10 +; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 BB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 1 -; GCN-NEXT: v_readlane_b32 s1, v0, 2 -; GCN-NEXT: v_readlane_b32 s2, v0, 3 -; GCN-NEXT: v_readlane_b32 s3, v0, 4 -; GCN-NEXT: v_readlane_b32 s4, v0, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 -; GCN-NEXT: v_readlane_b32 s6, v0, 7 -; GCN-NEXT: v_readlane_b32 s7, v0, 8 -; GCN-NEXT: v_readlane_b32 s8, v0, 9 -; GCN-NEXT: v_readlane_b32 s9, v0, 10 -; GCN-NEXT: v_readlane_b32 s10, v0, 11 -; GCN-NEXT: v_readlane_b32 s11, v0, 12 -; GCN-NEXT: v_readlane_b32 s12, v0, 13 -; GCN-NEXT: v_readlane_b32 s13, v0, 14 -; GCN-NEXT: v_readlane_b32 s14, v0, 15 -; GCN-NEXT: v_readlane_b32 s15, v0, 16 +; GCN-NEXT: v_readlane_b32 s16, v1, 8 +; GCN-NEXT: v_readlane_b32 s17, v1, 9 +; GCN-NEXT: v_readlane_b32 s20, v1, 0 +; GCN-NEXT: v_readlane_b32 s21, v1, 1 +; GCN-NEXT: v_readlane_b32 s22, v1, 2 +; GCN-NEXT: v_readlane_b32 s23, v1, 3 +; GCN-NEXT: v_readlane_b32 s24, v1, 4 +; GCN-NEXT: v_readlane_b32 s25, v1, 5 +; GCN-NEXT: v_readlane_b32 s26, v1, 6 +; GCN-NEXT: v_readlane_b32 s27, v1, 7 +; GCN-NEXT: v_readlane_b32 s36, v0, 32 +; GCN-NEXT: v_readlane_b32 s37, v0, 33 +; GCN-NEXT: v_readlane_b32 s38, v0, 34 +; GCN-NEXT: v_readlane_b32 s39, v0, 35 +; GCN-NEXT: v_readlane_b32 s40, v0, 36 +; GCN-NEXT: v_readlane_b32 s41, v0, 37 +; GCN-NEXT: v_readlane_b32 s42, v0, 38 +; GCN-NEXT: v_readlane_b32 s43, v0, 39 +; GCN-NEXT: v_readlane_b32 s44, v0, 40 +; GCN-NEXT: v_readlane_b32 s45, v0, 41 +; GCN-NEXT: v_readlane_b32 s46, v0, 42 +; GCN-NEXT: v_readlane_b32 s47, v0, 43 +; GCN-NEXT: v_readlane_b32 s48, v0, 44 +; GCN-NEXT: v_readlane_b32 s49, v0, 45 +; GCN-NEXT: v_readlane_b32 s50, v0, 46 +; GCN-NEXT: v_readlane_b32 s51, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s8, v0, 8 +; GCN-NEXT: v_readlane_b32 s9, v0, 9 +; GCN-NEXT: v_readlane_b32 s10, v0, 10 +; GCN-NEXT: v_readlane_b32 s11, v0, 11 +; GCN-NEXT: v_readlane_b32 s12, v0, 12 +; GCN-NEXT: v_readlane_b32 s13, v0, 13 +; GCN-NEXT: v_readlane_b32 s14, v0, 14 +; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 33 -; GCN-NEXT: v_readlane_b32 s1, v0, 34 -; GCN-NEXT: v_readlane_b32 s2, v0, 35 -; GCN-NEXT: v_readlane_b32 s3, v0, 36 -; GCN-NEXT: v_readlane_b32 s4, v0, 37 -; GCN-NEXT: v_readlane_b32 s5, v0, 38 -; GCN-NEXT: v_readlane_b32 s6, v0, 39 -; GCN-NEXT: v_readlane_b32 s7, v0, 40 -; GCN-NEXT: v_readlane_b32 s8, v0, 41 -; GCN-NEXT: v_readlane_b32 s9, v0, 42 -; GCN-NEXT: v_readlane_b32 s10, v0, 43 -; GCN-NEXT: v_readlane_b32 s11, v0, 44 -; GCN-NEXT: v_readlane_b32 s12, v0, 45 -; GCN-NEXT: v_readlane_b32 s13, v0, 46 -; GCN-NEXT: v_readlane_b32 s14, v0, 47 -; GCN-NEXT: v_readlane_b32 s15, v0, 48 +; GCN-NEXT: v_readlane_b32 s0, v0, 16 +; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s2, v0, 18 +; GCN-NEXT: v_readlane_b32 s3, v0, 19 +; GCN-NEXT: v_readlane_b32 s4, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v0, 21 +; GCN-NEXT: v_readlane_b32 s6, v0, 22 +; GCN-NEXT: v_readlane_b32 s7, v0, 23 +; GCN-NEXT: v_readlane_b32 s8, v0, 24 +; GCN-NEXT: v_readlane_b32 s9, v0, 25 +; GCN-NEXT: v_readlane_b32 s10, v0, 26 +; GCN-NEXT: v_readlane_b32 s11, v0, 27 +; GCN-NEXT: v_readlane_b32 s12, v0, 28 +; GCN-NEXT: v_readlane_b32 s13, v0, 29 +; GCN-NEXT: v_readlane_b32 s14, v0, 30 +; GCN-NEXT: v_readlane_b32 s15, v0, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 17 -; GCN-NEXT: v_readlane_b32 s1, v0, 18 -; GCN-NEXT: v_readlane_b32 s2, v0, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 20 -; GCN-NEXT: v_readlane_b32 s4, v0, 21 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 -; GCN-NEXT: v_readlane_b32 s6, v0, 23 -; GCN-NEXT: v_readlane_b32 s7, v0, 24 -; GCN-NEXT: v_readlane_b32 s8, v0, 25 -; GCN-NEXT: v_readlane_b32 s9, v0, 26 -; GCN-NEXT: v_readlane_b32 s10, v0, 27 -; GCN-NEXT: v_readlane_b32 s11, v0, 28 -; GCN-NEXT: v_readlane_b32 s12, v0, 29 -; GCN-NEXT: v_readlane_b32 s13, v0, 30 -; GCN-NEXT: v_readlane_b32 s14, v0, 31 -; GCN-NEXT: v_readlane_b32 s15, v0, 32 +; GCN-NEXT: v_readlane_b32 s0, v0, 48 +; GCN-NEXT: v_readlane_b32 s1, v0, 49 +; GCN-NEXT: v_readlane_b32 s2, v0, 50 +; GCN-NEXT: v_readlane_b32 s3, v0, 51 +; GCN-NEXT: v_readlane_b32 s4, v0, 52 +; GCN-NEXT: v_readlane_b32 s5, v0, 53 +; GCN-NEXT: v_readlane_b32 s6, v0, 54 +; GCN-NEXT: v_readlane_b32 s7, v0, 55 +; GCN-NEXT: v_readlane_b32 s8, v0, 56 +; GCN-NEXT: v_readlane_b32 s9, v0, 57 +; GCN-NEXT: v_readlane_b32 s10, v0, 58 +; GCN-NEXT: v_readlane_b32 s11, v0, 59 +; GCN-NEXT: v_readlane_b32 s12, v0, 60 +; GCN-NEXT: v_readlane_b32 s13, v0, 61 +; GCN-NEXT: v_readlane_b32 s14, v0, 62 +; GCN-NEXT: v_readlane_b32 s15, v0, 63 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 1 -; GCN-NEXT: v_readlane_b32 s1, v1, 2 -; GCN-NEXT: v_readlane_b32 s2, v1, 3 -; GCN-NEXT: v_readlane_b32 s3, v1, 4 -; GCN-NEXT: v_readlane_b32 s4, v1, 5 -; GCN-NEXT: v_readlane_b32 s5, v1, 6 -; GCN-NEXT: v_readlane_b32 s6, v1, 7 -; GCN-NEXT: v_readlane_b32 s7, v1, 8 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[20:27] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 9 -; GCN-NEXT: v_readlane_b32 s1, v1, 10 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:1] +; GCN-NEXT: ; use s[16:17] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 49 -; GCN-NEXT: v_readlane_b32 s1, v0, 50 -; GCN-NEXT: v_readlane_b32 s2, v0, 51 -; GCN-NEXT: v_readlane_b32 s3, v0, 52 -; GCN-NEXT: v_readlane_b32 s4, v0, 53 -; GCN-NEXT: v_readlane_b32 s5, v0, 54 -; GCN-NEXT: v_readlane_b32 s6, v0, 55 -; GCN-NEXT: v_readlane_b32 s7, v0, 56 -; GCN-NEXT: v_readlane_b32 s8, v0, 57 -; GCN-NEXT: v_readlane_b32 s9, v0, 58 -; GCN-NEXT: v_readlane_b32 s10, v0, 59 -; GCN-NEXT: v_readlane_b32 s11, v0, 60 -; GCN-NEXT: v_readlane_b32 s12, v0, 61 -; GCN-NEXT: v_readlane_b32 s13, v0, 62 -; GCN-NEXT: v_readlane_b32 s14, v0, 63 -; GCN-NEXT: v_readlane_b32 s15, v1, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND @@ -667,12 +663,12 @@ ret: define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s58, -1 -; GCN-NEXT: s_mov_b32 s59, 0xe8f000 -; GCN-NEXT: s_add_u32 s56, s56, s3 -; GCN-NEXT: s_addc_u32 s57, s57, 0 +; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s54, -1 +; GCN-NEXT: s_mov_b32 s55, 0xe8f000 +; GCN-NEXT: s_add_u32 s52, s52, s3 +; GCN-NEXT: s_addc_u32 s53, s53, 0 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND @@ -689,180 +685,176 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 0 +; GCN-NEXT: v_writelane_b32 v31, s5, 1 +; GCN-NEXT: v_writelane_b32 v31, s6, 2 +; GCN-NEXT: v_writelane_b32 v31, s7, 3 +; GCN-NEXT: v_writelane_b32 v31, s8, 4 +; GCN-NEXT: v_writelane_b32 v31, s9, 5 +; GCN-NEXT: v_writelane_b32 v31, s10, 6 +; GCN-NEXT: v_writelane_b32 v31, s11, 7 +; GCN-NEXT: v_writelane_b32 v31, s12, 8 +; GCN-NEXT: v_writelane_b32 v31, s13, 9 +; GCN-NEXT: v_writelane_b32 v31, s14, 10 +; GCN-NEXT: v_writelane_b32 v31, s15, 11 +; GCN-NEXT: v_writelane_b32 v31, s16, 12 +; GCN-NEXT: v_writelane_b32 v31, s17, 13 +; GCN-NEXT: v_writelane_b32 v31, s18, 14 +; GCN-NEXT: v_writelane_b32 v31, s19, 15 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[36:51] +; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v31, s0, 0 -; GCN-NEXT: v_writelane_b32 v31, s4, 1 -; GCN-NEXT: v_writelane_b32 v31, s5, 2 -; GCN-NEXT: v_writelane_b32 v31, s6, 3 -; GCN-NEXT: v_writelane_b32 v31, s7, 4 -; GCN-NEXT: v_writelane_b32 v31, s8, 5 -; GCN-NEXT: v_writelane_b32 v31, s9, 6 -; GCN-NEXT: v_writelane_b32 v31, s10, 7 -; GCN-NEXT: v_writelane_b32 v31, s11, 8 -; GCN-NEXT: v_writelane_b32 v31, s12, 9 -; GCN-NEXT: v_writelane_b32 v31, s13, 10 -; GCN-NEXT: v_writelane_b32 v31, s14, 11 -; GCN-NEXT: v_writelane_b32 v31, s15, 12 -; GCN-NEXT: v_writelane_b32 v31, s16, 13 -; GCN-NEXT: v_writelane_b32 v31, s17, 14 -; GCN-NEXT: v_writelane_b32 v31, s18, 15 -; GCN-NEXT: v_writelane_b32 v31, s19, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:15] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[16:31] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[34:35] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s33, 0 -; GCN-NEXT: v_readlane_b32 s52, v31, 0 -; GCN-NEXT: s_cmp_lg_u32 s52, s33 -; GCN-NEXT: v_writelane_b32 v31, s36, 17 -; GCN-NEXT: v_writelane_b32 v31, s37, 18 -; GCN-NEXT: v_writelane_b32 v31, s38, 19 -; GCN-NEXT: v_writelane_b32 v31, s39, 20 -; GCN-NEXT: v_writelane_b32 v31, s40, 21 -; GCN-NEXT: v_writelane_b32 v31, s41, 22 -; GCN-NEXT: v_writelane_b32 v31, s42, 23 -; GCN-NEXT: v_writelane_b32 v31, s43, 24 -; GCN-NEXT: v_writelane_b32 v31, s44, 25 -; GCN-NEXT: v_writelane_b32 v31, s45, 26 -; GCN-NEXT: v_writelane_b32 v31, s46, 27 -; GCN-NEXT: v_writelane_b32 v31, s47, 28 -; GCN-NEXT: v_writelane_b32 v31, s48, 29 -; GCN-NEXT: v_writelane_b32 v31, s49, 30 -; GCN-NEXT: v_writelane_b32 v31, s50, 31 -; GCN-NEXT: v_writelane_b32 v31, s51, 32 -; GCN-NEXT: v_writelane_b32 v31, s0, 33 -; GCN-NEXT: v_writelane_b32 v31, s1, 34 -; GCN-NEXT: v_writelane_b32 v31, s2, 35 -; GCN-NEXT: v_writelane_b32 v31, s3, 36 -; GCN-NEXT: v_writelane_b32 v31, s4, 37 -; GCN-NEXT: v_writelane_b32 v31, s5, 38 -; GCN-NEXT: v_writelane_b32 v31, s6, 39 -; GCN-NEXT: v_writelane_b32 v31, s7, 40 -; GCN-NEXT: v_writelane_b32 v31, s8, 41 -; GCN-NEXT: v_writelane_b32 v31, s9, 42 -; GCN-NEXT: v_writelane_b32 v31, s10, 43 -; GCN-NEXT: v_writelane_b32 v31, s11, 44 -; GCN-NEXT: v_writelane_b32 v31, s12, 45 -; GCN-NEXT: v_writelane_b32 v31, s13, 46 -; GCN-NEXT: v_writelane_b32 v31, s14, 47 -; GCN-NEXT: v_writelane_b32 v31, s15, 48 -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 -; GCN-NEXT: v_writelane_b32 v0, s16, 0 -; GCN-NEXT: v_writelane_b32 v0, s17, 1 -; GCN-NEXT: v_writelane_b32 v0, s18, 2 -; GCN-NEXT: v_writelane_b32 v0, s19, 3 -; GCN-NEXT: v_writelane_b32 v0, s20, 4 -; GCN-NEXT: v_writelane_b32 v0, s21, 5 -; GCN-NEXT: v_writelane_b32 v0, s22, 6 -; GCN-NEXT: v_writelane_b32 v0, s23, 7 -; GCN-NEXT: v_writelane_b32 v0, s24, 8 -; GCN-NEXT: v_writelane_b32 v0, s25, 9 -; GCN-NEXT: v_writelane_b32 v0, s26, 10 -; GCN-NEXT: v_writelane_b32 v0, s27, 11 -; GCN-NEXT: v_writelane_b32 v0, s28, 12 -; GCN-NEXT: v_writelane_b32 v0, s29, 13 -; GCN-NEXT: v_writelane_b32 v0, s30, 14 -; GCN-NEXT: v_writelane_b32 v0, s31, 15 -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 0xffff -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v31, s34, 49 -; GCN-NEXT: v_writelane_b32 v31, s35, 50 -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 -; GCN-NEXT: s_cbranch_scc1 BB2_2 -; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v31, 1 -; GCN-NEXT: v_readlane_b32 s1, v31, 2 -; GCN-NEXT: v_readlane_b32 s2, v31, 3 -; GCN-NEXT: v_readlane_b32 s3, v31, 4 -; GCN-NEXT: v_readlane_b32 s4, v31, 5 -; GCN-NEXT: v_readlane_b32 s5, v31, 6 -; GCN-NEXT: v_readlane_b32 s6, v31, 7 -; GCN-NEXT: v_readlane_b32 s7, v31, 8 -; GCN-NEXT: v_readlane_b32 s8, v31, 9 -; GCN-NEXT: v_readlane_b32 s9, v31, 10 -; GCN-NEXT: v_readlane_b32 s10, v31, 11 -; GCN-NEXT: v_readlane_b32 s11, v31, 12 -; GCN-NEXT: v_readlane_b32 s12, v31, 13 -; GCN-NEXT: v_readlane_b32 s13, v31, 14 -; GCN-NEXT: v_readlane_b32 s14, v31, 15 -; GCN-NEXT: v_readlane_b32 s15, v31, 16 +; GCN-NEXT: v_writelane_b32 v31, s4, 16 +; GCN-NEXT: v_writelane_b32 v31, s5, 17 +; GCN-NEXT: v_writelane_b32 v31, s6, 18 +; GCN-NEXT: v_writelane_b32 v31, s7, 19 +; GCN-NEXT: v_writelane_b32 v31, s8, 20 +; GCN-NEXT: v_writelane_b32 v31, s9, 21 +; GCN-NEXT: v_writelane_b32 v31, s10, 22 +; GCN-NEXT: v_writelane_b32 v31, s11, 23 +; GCN-NEXT: v_writelane_b32 v31, s12, 24 +; GCN-NEXT: v_writelane_b32 v31, s13, 25 +; GCN-NEXT: v_writelane_b32 v31, s14, 26 +; GCN-NEXT: v_writelane_b32 v31, s15, 27 +; GCN-NEXT: v_writelane_b32 v31, s16, 28 +; GCN-NEXT: v_writelane_b32 v31, s17, 29 +; GCN-NEXT: v_writelane_b32 v31, s18, 30 +; GCN-NEXT: v_writelane_b32 v31, s19, 31 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 17 -; GCN-NEXT: v_readlane_b32 s1, v31, 18 -; GCN-NEXT: v_readlane_b32 s2, v31, 19 -; GCN-NEXT: v_readlane_b32 s3, v31, 20 -; GCN-NEXT: v_readlane_b32 s4, v31, 21 -; GCN-NEXT: v_readlane_b32 s5, v31, 22 -; GCN-NEXT: v_readlane_b32 s6, v31, 23 -; GCN-NEXT: v_readlane_b32 s7, v31, 24 -; GCN-NEXT: v_readlane_b32 s8, v31, 25 -; GCN-NEXT: v_readlane_b32 s9, v31, 26 -; GCN-NEXT: v_readlane_b32 s10, v31, 27 -; GCN-NEXT: v_readlane_b32 s11, v31, 28 -; GCN-NEXT: v_readlane_b32 s12, v31, 29 -; GCN-NEXT: v_readlane_b32 s13, v31, 30 -; GCN-NEXT: v_readlane_b32 s14, v31, 31 -; GCN-NEXT: v_readlane_b32 s15, v31, 32 +; GCN-NEXT: v_writelane_b32 v31, s4, 32 +; GCN-NEXT: v_writelane_b32 v31, s5, 33 +; GCN-NEXT: v_writelane_b32 v31, s6, 34 +; GCN-NEXT: v_writelane_b32 v31, s7, 35 +; GCN-NEXT: v_writelane_b32 v31, s8, 36 +; GCN-NEXT: v_writelane_b32 v31, s9, 37 +; GCN-NEXT: v_writelane_b32 v31, s10, 38 +; GCN-NEXT: v_writelane_b32 v31, s11, 39 +; GCN-NEXT: v_writelane_b32 v31, s12, 40 +; GCN-NEXT: v_writelane_b32 v31, s13, 41 +; GCN-NEXT: v_writelane_b32 v31, s14, 42 +; GCN-NEXT: v_writelane_b32 v31, s15, 43 +; GCN-NEXT: v_writelane_b32 v31, s16, 44 +; GCN-NEXT: v_writelane_b32 v31, s17, 45 +; GCN-NEXT: v_writelane_b32 v31, s18, 46 +; GCN-NEXT: v_writelane_b32 v31, s19, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 48 +; GCN-NEXT: v_writelane_b32 v31, s5, 49 +; GCN-NEXT: v_writelane_b32 v31, s6, 50 +; GCN-NEXT: v_writelane_b32 v31, s7, 51 +; GCN-NEXT: v_writelane_b32 v31, s8, 52 +; GCN-NEXT: v_writelane_b32 v31, s9, 53 +; GCN-NEXT: v_writelane_b32 v31, s10, 54 +; GCN-NEXT: v_writelane_b32 v31, s11, 55 +; GCN-NEXT: v_writelane_b32 v31, s12, 56 +; GCN-NEXT: v_writelane_b32 v31, s13, 57 +; GCN-NEXT: v_writelane_b32 v31, s14, 58 +; GCN-NEXT: v_writelane_b32 v31, s15, 59 +; GCN-NEXT: v_writelane_b32 v31, s16, 60 +; GCN-NEXT: v_writelane_b32 v31, s17, 61 +; GCN-NEXT: v_writelane_b32 v31, s18, 62 +; GCN-NEXT: v_writelane_b32 v31, s19, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[2:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s2, 0 +; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: s_mov_b64 s[2:3], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[2:3] +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_lg_u32 s0, s1 +; GCN-NEXT: s_cbranch_scc1 BB2_2 +; GCN-NEXT: ; %bb.1: ; %bb0 +; GCN-NEXT: v_readlane_b32 s36, v31, 32 +; GCN-NEXT: v_readlane_b32 s37, v31, 33 +; GCN-NEXT: v_readlane_b32 s38, v31, 34 +; GCN-NEXT: v_readlane_b32 s39, v31, 35 +; GCN-NEXT: v_readlane_b32 s40, v31, 36 +; GCN-NEXT: v_readlane_b32 s41, v31, 37 +; GCN-NEXT: v_readlane_b32 s42, v31, 38 +; GCN-NEXT: v_readlane_b32 s43, v31, 39 +; GCN-NEXT: v_readlane_b32 s44, v31, 40 +; GCN-NEXT: v_readlane_b32 s45, v31, 41 +; GCN-NEXT: v_readlane_b32 s46, v31, 42 +; GCN-NEXT: v_readlane_b32 s47, v31, 43 +; GCN-NEXT: v_readlane_b32 s48, v31, 44 +; GCN-NEXT: v_readlane_b32 s49, v31, 45 +; GCN-NEXT: v_readlane_b32 s50, v31, 46 +; GCN-NEXT: v_readlane_b32 s51, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 16 +; GCN-NEXT: v_readlane_b32 s1, v31, 17 +; GCN-NEXT: v_readlane_b32 s2, v31, 18 +; GCN-NEXT: v_readlane_b32 s3, v31, 19 +; GCN-NEXT: v_readlane_b32 s4, v31, 20 +; GCN-NEXT: v_readlane_b32 s5, v31, 21 +; GCN-NEXT: v_readlane_b32 s6, v31, 22 +; GCN-NEXT: v_readlane_b32 s7, v31, 23 +; GCN-NEXT: v_readlane_b32 s8, v31, 24 +; GCN-NEXT: v_readlane_b32 s9, v31, 25 +; GCN-NEXT: v_readlane_b32 s10, v31, 26 +; GCN-NEXT: v_readlane_b32 s11, v31, 27 +; GCN-NEXT: v_readlane_b32 s12, v31, 28 +; GCN-NEXT: v_readlane_b32 s13, v31, 29 +; GCN-NEXT: v_readlane_b32 s14, v31, 30 +; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s16, v31, 0 +; GCN-NEXT: v_readlane_b32 s17, v31, 1 +; GCN-NEXT: v_readlane_b32 s18, v31, 2 +; GCN-NEXT: v_readlane_b32 s19, v31, 3 +; GCN-NEXT: v_readlane_b32 s20, v31, 4 +; GCN-NEXT: v_readlane_b32 s21, v31, 5 +; GCN-NEXT: v_readlane_b32 s22, v31, 6 +; GCN-NEXT: v_readlane_b32 s23, v31, 7 +; GCN-NEXT: v_readlane_b32 s24, v31, 8 +; GCN-NEXT: v_readlane_b32 s25, v31, 9 +; GCN-NEXT: v_readlane_b32 s26, v31, 10 +; GCN-NEXT: v_readlane_b32 s27, v31, 11 +; GCN-NEXT: v_readlane_b32 s28, v31, 12 +; GCN-NEXT: v_readlane_b32 s29, v31, 13 +; GCN-NEXT: v_readlane_b32 s30, v31, 14 +; GCN-NEXT: v_readlane_b32 s31, v31, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[16:31] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 33 -; GCN-NEXT: v_readlane_b32 s1, v31, 34 -; GCN-NEXT: v_readlane_b32 s2, v31, 35 -; GCN-NEXT: v_readlane_b32 s3, v31, 36 -; GCN-NEXT: v_readlane_b32 s4, v31, 37 -; GCN-NEXT: v_readlane_b32 s5, v31, 38 -; GCN-NEXT: v_readlane_b32 s6, v31, 39 -; GCN-NEXT: v_readlane_b32 s7, v31, 40 -; GCN-NEXT: v_readlane_b32 s8, v31, 41 -; GCN-NEXT: v_readlane_b32 s9, v31, 42 -; GCN-NEXT: v_readlane_b32 s10, v31, 43 -; GCN-NEXT: v_readlane_b32 s11, v31, 44 -; GCN-NEXT: v_readlane_b32 s12, v31, 45 -; GCN-NEXT: v_readlane_b32 s13, v31, 46 -; GCN-NEXT: v_readlane_b32 s14, v31, 47 -; GCN-NEXT: v_readlane_b32 s15, v31, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s4, v31, 48 +; GCN-NEXT: v_readlane_b32 s5, v31, 49 +; GCN-NEXT: v_readlane_b32 s6, v31, 50 +; GCN-NEXT: v_readlane_b32 s7, v31, 51 +; GCN-NEXT: v_readlane_b32 s8, v31, 52 +; GCN-NEXT: v_readlane_b32 s9, v31, 53 +; GCN-NEXT: v_readlane_b32 s10, v31, 54 +; GCN-NEXT: v_readlane_b32 s11, v31, 55 +; GCN-NEXT: v_readlane_b32 s12, v31, 56 +; GCN-NEXT: v_readlane_b32 s13, v31, 57 +; GCN-NEXT: v_readlane_b32 s14, v31, 58 +; GCN-NEXT: v_readlane_b32 s15, v31, 59 +; GCN-NEXT: v_readlane_b32 s16, v31, 60 +; GCN-NEXT: v_readlane_b32 s17, v31, 61 +; GCN-NEXT: v_readlane_b32 s18, v31, 62 +; GCN-NEXT: v_readlane_b32 s19, v31, 63 ; GCN-NEXT: s_mov_b64 s[0:1], exec -; GCN-NEXT: s_mov_b64 exec, 0xffff -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s0, v0, 0 ; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 -; GCN-NEXT: v_readlane_b32 s8, v0, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 9 -; GCN-NEXT: v_readlane_b32 s10, v0, 10 -; GCN-NEXT: v_readlane_b32 s11, v0, 11 -; GCN-NEXT: v_readlane_b32 s12, v0, 12 -; GCN-NEXT: v_readlane_b32 s13, v0, 13 -; GCN-NEXT: v_readlane_b32 s14, v0, 14 -; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; use s[36:51] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 49 -; GCN-NEXT: v_readlane_b32 s1, v31, 50 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll index 73d837efa9f44..56a675e3ddad1 100644 --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -11,8 +11,8 @@ define void @child_function() #0 { ; GCN: v_writelane_b32 v255, s30, 0 ; GCN: v_writelane_b32 v255, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s4, v255, 0 -; GCN: v_readlane_b32 s5, v255, 1 +; GCN: v_readlane_b32 s30, v255, 0 +; GCN: v_readlane_b32 s31, v255, 1 ; GCN: v_readlane_b32 s33, v255, 2 ; GCN: ; NumVgprs: 256 @@ -57,8 +57,8 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 { ; GCN: v_writelane_b32 v254, s30, 0 ; GCN: v_writelane_b32 v254, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s4, v254, 0 -; GCN: v_readlane_b32 s5, v254, 1 +; GCN: v_readlane_b32 s30, v254, 0 +; GCN: v_readlane_b32 s31, v254, 1 ; GCN: v_readlane_b32 s33, v254, 2 define void @reserve_lowest_available_vgpr() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index 2138af8099f92..c817b977eb9d4 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -13,25 +13,25 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; SPILLED: S_NOP 0, implicit-def renamable $agpr1 + ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; SPILLED: SI_SPILL_A32_SAVE killed $agpr1, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1 + ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 ; EXPANDED-LABEL: name: spill_restore_agpr32 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0 - ; EXPANDED: S_NOP 0, implicit-def renamable $agpr1 ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec - ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0 + ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) @@ -41,7 +41,7 @@ body: | ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec - ; EXPANDED: S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 bb.0: S_NOP 0, implicit-def %0:agpr_32 S_NOP 0, implicit-def %1:agpr_32 @@ -72,7 +72,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; EXPANDED-LABEL: name: spill_restore_agpr64 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -89,7 +89,7 @@ body: | ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1 bb.0: S_NOP 0, implicit-def %0:areg_64 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -118,6 +118,7 @@ body: | ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: bb.2: + ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) ; SPILLED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; SPILLED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; SPILLED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -134,8 +135,7 @@ body: | ; SPILLED: S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; SPILLED: S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247 ; SPILLED: S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 - ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0 ; EXPANDED-LABEL: name: spill_restore_agpr32_used_all_vgprs ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -149,6 +149,8 @@ body: | ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) ; EXPANDED: bb.2: + ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec ; EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; EXPANDED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -165,9 +167,7 @@ body: | ; EXPANDED: S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; EXPANDED: S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247 ; EXPANDED: S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 - ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - ; EXPANDED: S_NOP 0, implicit renamable $agpr0 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0 bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 @@ -214,7 +214,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; EXPANDED-LABEL: name: spill_restore_agpr96 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -233,7 +233,7 @@ body: | ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 bb.0: S_NOP 0, implicit-def %0:areg_96 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -263,7 +263,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; EXPANDED-LABEL: name: spill_restore_agpr128 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -284,7 +284,7 @@ body: | ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 bb.0: S_NOP 0, implicit-def %0:areg_128 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -314,7 +314,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED-LABEL: name: spill_restore_agpr160 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -337,7 +337,7 @@ body: | ; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 bb.0: S_NOP 0, implicit-def %0:areg_160 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -367,7 +367,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED-LABEL: name: spill_restore_agpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -392,7 +392,7 @@ body: | ; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 bb.0: S_NOP 0, implicit-def %0:areg_192 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -422,7 +422,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED-LABEL: name: spill_restore_agpr256 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -451,7 +451,7 @@ body: | ; EXPANDED: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $agpr6 = V_ACCVGPR_WRITE_B32 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $agpr7 = V_ACCVGPR_WRITE_B32 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 bb.0: S_NOP 0, implicit-def %0:areg_256 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -481,7 +481,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED-LABEL: name: spill_restore_agpr512 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -526,7 +526,7 @@ body: | ; EXPANDED: $agpr13 = V_ACCVGPR_WRITE_B32 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $agpr14 = V_ACCVGPR_WRITE_B32 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $agpr15 = V_ACCVGPR_WRITE_B32 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 bb.0: S_NOP 0, implicit-def %0:areg_512 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -556,7 +556,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED-LABEL: name: spill_restore_agpr1024 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -633,7 +633,7 @@ body: | ; EXPANDED: $agpr29 = V_ACCVGPR_WRITE_B32 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $agpr30 = V_ACCVGPR_WRITE_B32 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $agpr31 = V_ACCVGPR_WRITE_B32 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 bb.0: S_NOP 0, implicit-def %0:areg_1024 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index 9b629a5f91110..474461d2ae128 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -1,28 +1,32 @@ -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s ; XXX - Why does it like to use vcc? ; GCN-LABEL: {{^}}spill_m0: -; GCN-DAG: s_cmp_lg_u32 +; GCN: #ASMSTART +; GCN-NEXT: s_mov_b32 m0, 0 +; GCN-NEXT: #ASMEND +; GCN-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2 +; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]] -; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVMEM-DAG: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 -; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Spill +; TOVMEM: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 +; TOVMEM: s_mov_b32 [[COPY_EXEC_LO:s[0-9]+]], exec_lo +; TOVMEM: s_mov_b32 exec_lo, 1 +; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Spill +; TOVMEM: s_mov_b32 exec_lo, [[COPY_EXEC_LO]] ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ENDIF]]: -; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2 +; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], [[M0_LANE]] ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]] -; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Reload +; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Reload ; TOVMEM: s_waitcnt vmcnt(0) ; TOVMEM: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]], 0 ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]] @@ -48,8 +52,6 @@ endif: ; m0 is killed, so it isn't necessary during the entry block spill to preserve it ; GCN-LABEL: {{^}}spill_kill_m0_lds: -; GCN: s_mov_b32 m0, s6 -; GCN: v_interp_mov_f32 ; GCN-NOT: v_readlane_b32 m0 ; GCN-NOT: s_buffer_store_dword m0 @@ -79,10 +81,11 @@ endif: ; preds = %else, %if ; Force save and restore of m0 during SMEM spill ; GCN-LABEL: {{^}}m0_unavailable_spill: +; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}} ; GCN: ; def m0, 1 -; GCN: s_mov_b32 m0, s0 +; GCN: s_mov_b32 m0, [[REG0]] ; GCN: v_interp_mov_f32 ; GCN: ; clobber m0 @@ -124,16 +127,17 @@ endif: } ; GCN-LABEL: {{^}}restore_m0_lds: -; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] -; TOSMEM: s_cmp_eq_u32 ; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_buffer_store_dword s1, s[88:91], m0 ; 4-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_cmp_eq_u32 ; TOSMEM: s_cbranch_scc1 ; TOSMEM: s_mov_b32 m0, -1 @@ -150,6 +154,13 @@ endif: ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload ; FIXME-TOSMEM-NOT: m0 + +; TOSMEM: s_mov_b32 [[REG1:s[0-9]+]], m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload +; TOSMEM: s_mov_b32 m0, [[REG1]] +; TOSMEM: s_mov_b32 m0, -1 + ; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM-NOT: m0 ; TOSMEM: s_mov_b32 m0, s2 diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir index 25a2d6ebb0068..9344b0cb8c8a3 100644 --- a/llvm/test/CodeGen/AMDGPU/spill192.mir +++ b/llvm/test/CodeGen/AMDGPU/spill192.mir @@ -24,7 +24,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED-LABEL: name: spill_restore_sgpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -49,7 +49,7 @@ body: | ; EXPANDED: $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3 ; EXPANDED: $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4 ; EXPANDED: $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5 - ; EXPANDED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 bb.0: S_NOP 0, implicit-def %0:sgpr_192 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -79,7 +79,7 @@ body: | ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; EXPANDED-LABEL: name: spill_restore_vgpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -91,7 +91,7 @@ body: | ; EXPANDED: S_NOP 1 ; EXPANDED: bb.2: ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) - ; EXPANDED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 bb.0: S_NOP 0, implicit-def %0:vreg_192 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir new file mode 100644 index 0000000000000..9f5b4793ecfb4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s + +--- +name: bar +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: bar + ; CHECK: liveins: $vgpr0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec + ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc + %0:vgpr_32 = COPY $vgpr0 + V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec + %3:sreg_64_xexec = COPY $vcc + %1:sreg_64_xexec = COPY $vcc + %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0, implicit $vgpr0, implicit %3 + +... diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index 1a48e76a241bb..0193313ff213c 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -69,8 +69,8 @@ if: merge: %merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ] ; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]] -; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]] ; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]] +; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]] ; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]] %tmp138 = icmp eq i32 %tmp122, %merge_value %tmp139 = sext i1 %tmp138 to i32 @@ -82,7 +82,7 @@ merge: } ; GFX9-LABEL: {{^}}called: -define i32 @called(i32 %a) noinline { +define hidden i32 @called(i32 %a) noinline { ; GFX9: v_add_u32_e32 v1, v0, v0 %add = add i32 %a, %a ; GFX9: v_mul_lo_u32 v0, v1, v0 @@ -94,10 +94,15 @@ define i32 @called(i32 %a) noinline { ; GFX9-LABEL: {{^}}call: define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s0 -; GFX9-O3: v_mov_b32_e32 v2, s0 +; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]] +; GFX9-O0-DAG: s_mov_b32 s0, 0{{$}} +; GFX9-O0-DAG: v_mov_b32_e32 v0, [[ARG]] + +; GFX9-O3: v_mov_b32_e32 v2, [[ARG]] + + ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) @@ -107,12 +112,11 @@ define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { %tmp134 = call i32 @called(i32 %tmp107) ; GFX9-O0: buffer_load_dword v1 ; GFX9-O3: v_mov_b32_e32 v1, v0 -; GFX9-O0: v_add_u32_e32 v0, v0, v1 +; GFX9-O0: v_add_u32_e32 v1, v0, v1 ; GFX9-O3: v_add_u32_e32 v1, v1, v2 %tmp136 = add i32 %tmp134, %tmp107 %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136) -; GFX9-O0: buffer_store_dword v2 -; GFX9-O3: buffer_store_dword v0 +; GFX9: buffer_store_dword v0 call void @llvm.amdgcn.raw.buffer.store.i32(i32 %tmp137, <4 x i32> %tmp14, i32 4, i32 0, i32 0) ret void } @@ -127,19 +131,24 @@ define i64 @called_i64(i64 %a) noinline { ; GFX9-LABEL: {{^}}call_i64: define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s0 -; GFX9-O0: v_mov_b32_e32 v1, s1 -; GFX9-O3: v_mov_b32_e32 v7, s1 -; GFX9-O3: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s3 +; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}} + +; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}} +; GFX9-O0: v_mov_b32_e32 v1, s[[ARG_LO]] +; GFX9-O0: v_mov_b32_e32 v2, s[[ARG_HI]] + +; GFX9-O3-DAG: v_mov_b32_e32 v7, s[[ARG_HI]] +; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]] + +; GFX9: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s[[ZERO_LO]] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s[[ZERO_HI]] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) -; GFX9-O0: buffer_store_dword v0 ; GFX9-O0: buffer_store_dword v1 +; GFX9-O0: buffer_store_dword v2 ; GFX9: s_swappc_b64 %tmp134 = call i64 @called_i64(i64 %tmp107) ; GFX9-O0: buffer_load_dword v4 diff --git a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll index b15145d85f179..662a78c4dfa65 100644 --- a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -8,6 +8,9 @@ define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) #0 !dbg !4 { entry: %"alloca point" = bitcast i32 0 to i32 + br label %realentry + +realentry: call void @llvm.dbg.value(metadata i32 %i, metadata !21, metadata !DIExpression()), !dbg !22 call void @llvm.dbg.value(metadata %struct.SVal* %location, metadata !23, metadata !DIExpression()), !dbg !22 %tmp = icmp ne i32 %i, 0, !dbg !25 diff --git a/llvm/test/CodeGen/ARM/Windows/alloca.ll b/llvm/test/CodeGen/ARM/Windows/alloca.ll index 7db854df72967..ec3b130b3d8bf 100644 --- a/llvm/test/CodeGen/ARM/Windows/alloca.ll +++ b/llvm/test/CodeGen/ARM/Windows/alloca.ll @@ -17,10 +17,11 @@ entry: ; CHECK: bl num_entries ; Any register is actually valid here, but turns out we use lr, ; because we do not have the kill flag on R0. -; CHECK: movs [[R1:r1]], #7 -; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 -; CHECK: bic [[R0]], [[R0]], #4 -; CHECK: lsrs r4, [[R0]], #2 +; CHECK: mov [[R0:r[0-9]+]], r0 +; CHECK: movs [[R1:r[0-9]+]], #7 +; CHECK: add.w [[R2:r[0-9]+]], [[R1]], [[R0]], lsl #2 +; CHECK: bic [[R2]], [[R2]], #4 +; CHECK: lsrs r4, [[R2]], #2 ; CHECK: bl __chkstk ; CHECK: sub.w sp, sp, r4 diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll index 9e9a93e19b6a4..29336c2f7ffdf 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll @@ -7,12 +7,10 @@ ; CHECK_LABEL: main: ; CHECK: ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}} ; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4] -; CHECK: mov [[R4:r[0-9]+]], [[R2]] -; CHECK-NEXT: mov [[R5:r[0-9]+]], [[R1]] -; CHECK: ldr [[R2]], {{\[}}[[R1]]{{\]}} -; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4] -; CHECK: mov [[R6:r[0-9]+]], [[R2]] -; CHECK-NEXT: mov [[R7:r[0-9]+]], [[R1]] +; CHECK: mov [[R4:r[0-9]+]], [[R1]] +; CHECK: ldr [[R5:r[0-9]+]], {{\[}}[[R1]]{{\]}} +; CHECK-NEXT: ldr [[R6:r[0-9]+]], {{\[}}[[R1]], #4] +; CHECK: mov [[R7:r[0-9]+]], [[R6]] define arm_aapcs_vfpcc i32 @main() #0 { entry: diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll index d3696cfe39a8e..1bc15dce20813 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll @@ -7,19 +7,21 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexb [[OLD:[lr0-9]+]], [r0] +; CHECK: ldrexb [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexb [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: ; Materialisation of a boolean is done with sub/clz/lsr ; CHECK: uxtb [[CMP1:r[0-9]+]], [[DESIRED]] -; CHECK: sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]] +; CHECK: sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]] ; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] ; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish @@ -29,19 +31,21 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexh [[OLD:[lr0-9]+]], [r0] +; CHECK: ldrexh [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexh [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: ; Materialisation of a boolean is done with sub/clz/lsr ; CHECK: uxth [[CMP1:r[0-9]+]], [[DESIRED]] -; CHECK: sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]] +; CHECK: sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]] ; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] ; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish @@ -51,13 +55,15 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrex [[OLD:r[0-9]+]], [r0] +; CHECK: ldrex [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strex [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -72,14 +78,15 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: +; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLDLO]], r6 ; CHECK: cmpeq [[OLDHI]], r7 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:[lr0-9]+]], r4, r5, [r0] +; CHECK: strexd [[STATUS:[lr0-9]+]], r8, r9, [r1] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -90,14 +97,15 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind define { i64, i1 } @test_nontrivial_args(i64* %addr, i64 %desired, i64 %new) { ; CHECK-LABEL: test_nontrivial_args: +; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLDLO]], {{r[0-9]+}} ; CHECK: cmpeq [[OLDHI]], {{r[0-9]+}} ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r0] +; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: diff --git a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll index d3c5057e38214..a0241d95a7c6f 100644 --- a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll +++ b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll @@ -14,21 +14,21 @@ for.body.lr.ph: ; preds = %entry for.body: ; preds = %for.body, %for.body.lr.ph ; SOURCE-SCHED: ldr ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr ; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: ldr ; SOURCE-SCHED: str ; SOURCE-SCHED: str ; SOURCE-SCHED: str ; SOURCE-SCHED: str -; SOURCE-SCHED: ldr ; SOURCE-SCHED: bl -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr +; SOURCE-SCHED: add ; SOURCE-SCHED: cmp ; SOURCE-SCHED: bne %i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ] diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll index 8b31e7a51d514..1c9ffb1775aa4 100644 --- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll +++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll @@ -6,8 +6,7 @@ ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18) ; CHECK-NEXT: DW_AT_name {{.*}} "mydata" ; Radar 9331779 diff --git a/llvm/test/CodeGen/ARM/fast-isel-call.ll b/llvm/test/CodeGen/ARM/fast-isel-call.ll index 9c313c727aee5..293c268c5359b 100644 --- a/llvm/test/CodeGen/ARM/fast-isel-call.ll +++ b/llvm/test/CodeGen/ARM/fast-isel-call.ll @@ -41,38 +41,31 @@ define void @foo(i8 %a, i16 %b) nounwind { ; ARM: foo ; THUMB: foo ;; Materialize i1 1 -; ARM: movw r2, #1 +; ARM: movw [[REG0:r[0-9]+]], #1 +; THUMB: movs [[REG0:r[0-9]+]], #1 ;; zero-ext -; ARM: and r2, r2, #1 -; THUMB: and r2, r2, #1 +; ARM: and [[REG1:r[0-9]+]], [[REG0]], #1 +; THUMB: and [[REG1:r[0-9]+]], [[REG0]], #1 %1 = call i32 @t0(i1 zeroext 1) -; ARM: sxtb r2, r1 -; ARM: mov r0, r2 -; THUMB: sxtb r2, r1 -; THUMB: mov r0, r2 +; ARM: sxtb r0, {{r[0-9]+}} +; THUMB: sxtb r0, {{r[0-9]+}} %2 = call i32 @t1(i8 signext %a) -; ARM: and r2, r1, #255 -; ARM: mov r0, r2 -; THUMB: and r2, r1, #255 -; THUMB: mov r0, r2 +; ARM: and r0, {{r[0-9]+}}, #255 +; THUMB: and r0, {{r[0-9]+}}, #255 %3 = call i32 @t2(i8 zeroext %a) -; ARM: sxth r2, r1 -; ARM: mov r0, r2 -; THUMB: sxth r2, r1 -; THUMB: mov r0, r2 +; ARM: sxth r0, {{r[0-9]+}} +; THUMB: sxth r0, {{r[0-9]+}} %4 = call i32 @t3(i16 signext %b) -; ARM: uxth r2, r1 -; ARM: mov r0, r2 -; THUMB: uxth r2, r1 -; THUMB: mov r0, r2 +; ARM: uxth r0, {{r[0-9]+}} +; THUMB: uxth r0, {{r[0-9]+}} %5 = call i32 @t4(i16 zeroext %b) ;; A few test to check materialization ;; Note: i1 1 was materialized with t1 call -; ARM: movw r1, #255 +; ARM: movw {{r[0-9]+}}, #255 %6 = call i32 @t2(i8 zeroext 255) -; ARM: movw r1, #65535 -; THUMB: movw r1, #65535 +; ARM: movw {{r[0-9]+}}, #65535 +; THUMB: movw {{r[0-9]+}}, #65535 %7 = call i32 @t4(i16 zeroext 65535) ret void } @@ -112,10 +105,9 @@ entry: ; ARM: bl {{_?}}bar ; ARM-LONG-LABEL: @t10 -; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} -; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; ARM-LONG-MACHO: str [[R]], [r7, [[SLOT:#[-0-9]+]]] @ 4-byte Spill -; ARM-LONG-MACHO: ldr [[R:l?r[0-9]*]], [r7, [[SLOT]]] @ 4-byte Reload +; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; ARM-LONG-MACHO: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}} +; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], {{\[}}[[R1]]] ; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar ; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} @@ -138,11 +130,9 @@ entry: ; THUMB-DAG: str.w [[R4]], [sp, #4] ; THUMB: bl {{_?}}bar ; THUMB-LONG-LABEL: @t10 -; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} -; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}} -; THUMB-LONG: str [[R]], [sp, [[SLOT:#[-0-9]+]]] @ 4-byte Spill -; THUMB-LONG: ldr.w [[R:l?r[0-9]*]], [sp, [[SLOT]]] @ 4-byte Reload +; THUMB-LONG: {{(movw)|(ldr.n)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; THUMB-LONG: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}} +; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], {{\[}}[[R1]]{{\]}} ; THUMB-LONG: blx [[R]] %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 diff --git a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll index b308c4482d275..bda4c6d47237c 100644 --- a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -55,16 +55,13 @@ define void @t2() nounwind ssp { ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG1:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG1:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG1]], :upper16:temp -; ARM: add r1, r0, #4 -; ARM: add r0, r0, #16 -; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; ARM: mov r0, r1 -; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; ARM: add r0, [[REG1]], #4 +; ARM: add r1, [[REG1]], #16 ; ARM: movw r2, #17 ; ARM: bl {{_?}}memcpy ; ARM-LONG-LABEL: t2: @@ -80,12 +77,9 @@ define void @t2() nounwind ssp { ; THUMB-LABEL: t2: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: adds r1, r0, #4 -; THUMB: adds r0, #16 -; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; THUMB: mov r0, r1 -; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: adds r0, [[REG1]], #4 +; THUMB: adds r1, #16 ; THUMB: movs r2, #17 ; THUMB: bl {{_?}}memcpy ; THUMB-LONG-LABEL: t2: @@ -104,15 +98,14 @@ define void @t3() nounwind ssp { ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp -; ARM: add r1, r0, #4 -; ARM: add r0, r0, #16 -; ARM: mov r0, r1 +; ARM: add r0, [[REG0]], #4 +; ARM: add r1, [[REG0]], #16 ; ARM: movw r2, #10 ; ARM: bl {{_?}}memmove ; ARM-LONG-LABEL: t3: @@ -128,12 +121,9 @@ define void @t3() nounwind ssp { ; THUMB-LABEL: t3: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: adds r1, r0, #4 -; THUMB: adds r0, #16 -; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; THUMB: mov r0, r1 -; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: adds r0, [[REG1]], #4 +; THUMB: adds r1, #16 ; THUMB: movs r2, #10 ; THUMB: bl {{_?}}memmove ; THUMB-LONG-LABEL: t3: @@ -150,28 +140,28 @@ define void @t4() nounwind ssp { ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp -; ARM: ldr r1, [r0, #16] -; ARM: str r1, [r0, #4] -; ARM: ldr r1, [r0, #20] -; ARM: str r1, [r0, #8] -; ARM: ldrh r1, [r0, #24] -; ARM: strh r1, [r0, #12] +; ARM: ldr [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: str [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldr [[REG2:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: str [[REG2]], {{\[}}[[REG0]], #8] +; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strh [[REG3]], {{\[}}[[REG0]], #12] ; ARM: bx lr ; THUMB-LABEL: t4: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldr r1, [r0, #16] -; THUMB: str r1, [r0, #4] -; THUMB: ldr r1, [r0, #20] -; THUMB: str r1, [r0, #8] -; THUMB: ldrh r1, [r0, #24] -; THUMB: strh r1, [r0, #12] +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: ldr [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16] +; THUMB: str [[REG2]], {{\[}}[[REG1]], #4] +; THUMB: ldr [[REG3:r[0-9]+]], {{\[}}[[REG1]], #20] +; THUMB: str [[REG3]], {{\[}}[[REG1]], #8] +; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #24] +; THUMB: strh [[REG4]], {{\[}}[[REG1]], #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void @@ -184,36 +174,36 @@ define void @t5() nounwind ssp { ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] - -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp - -; ARM: ldrh r1, [r0, #16] -; ARM: strh r1, [r0, #4] -; ARM: ldrh r1, [r0, #18] -; ARM: strh r1, [r0, #6] -; ARM: ldrh r1, [r0, #20] -; ARM: strh r1, [r0, #8] -; ARM: ldrh r1, [r0, #22] -; ARM: strh r1, [r0, #10] -; ARM: ldrh r1, [r0, #24] -; ARM: strh r1, [r0, #12] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] + +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp + +; ARM: ldrh [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: strh [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG0]], #18] +; ARM: strh [[REG2]], {{\[}}[[REG0]], #6] +; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: strh [[REG3]], {{\[}}[[REG0]], #8] +; ARM: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG0]], #22] +; ARM: strh [[REG4]], {{\[}}[[REG0]], #10] +; ARM: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strh [[REG5]], {{\[}}[[REG0]], #12] ; ARM: bx lr ; THUMB-LABEL: t5: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldrh r1, [r0, #16] -; THUMB: strh r1, [r0, #4] -; THUMB: ldrh r1, [r0, #18] -; THUMB: strh r1, [r0, #6] -; THUMB: ldrh r1, [r0, #20] -; THUMB: strh r1, [r0, #8] -; THUMB: ldrh r1, [r0, #22] -; THUMB: strh r1, [r0, #10] -; THUMB: ldrh r1, [r0, #24] -; THUMB: strh r1, [r0, #12] +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16] +; THUMB: strh [[REG2]], {{\[}}[[REG1]], #4] +; THUMB: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG1]], #18] +; THUMB: strh [[REG3]], {{\[}}[[REG1]], #6] +; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #20] +; THUMB: strh [[REG4]], {{\[}}[[REG1]], #8] +; THUMB: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG1]], #22] +; THUMB: strh [[REG5]], {{\[}}[[REG1]], #10] +; THUMB: ldrh [[REG6:r[0-9]+]], {{\[}}[[REG1]], #24] +; THUMB: strh [[REG6]], {{\[}}[[REG1]], #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void @@ -224,56 +214,56 @@ define void @t6() nounwind ssp { ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] - -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp - -; ARM: ldrb r1, [r0, #16] -; ARM: strb r1, [r0, #4] -; ARM: ldrb r1, [r0, #17] -; ARM: strb r1, [r0, #5] -; ARM: ldrb r1, [r0, #18] -; ARM: strb r1, [r0, #6] -; ARM: ldrb r1, [r0, #19] -; ARM: strb r1, [r0, #7] -; ARM: ldrb r1, [r0, #20] -; ARM: strb r1, [r0, #8] -; ARM: ldrb r1, [r0, #21] -; ARM: strb r1, [r0, #9] -; ARM: ldrb r1, [r0, #22] -; ARM: strb r1, [r0, #10] -; ARM: ldrb r1, [r0, #23] -; ARM: strb r1, [r0, #11] -; ARM: ldrb r1, [r0, #24] -; ARM: strb r1, [r0, #12] -; ARM: ldrb r1, [r0, #25] -; ARM: strb r1, [r0, #13] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] + +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp + +; ARM: ldrb [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: strb [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #17] +; ARM: strb [[REG2]], {{\[}}[[REG0]], #5] +; ARM: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #18] +; ARM: strb [[REG3]], {{\[}}[[REG0]], #6] +; ARM: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #19] +; ARM: strb [[REG4]], {{\[}}[[REG0]], #7] +; ARM: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: strb [[REG5]], {{\[}}[[REG0]], #8] +; ARM: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #21] +; ARM: strb [[REG6]], {{\[}}[[REG0]], #9] +; ARM: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #22] +; ARM: strb [[REG7]], {{\[}}[[REG0]], #10] +; ARM: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #23] +; ARM: strb [[REG8]], {{\[}}[[REG0]], #11] +; ARM: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strb [[REG9]], {{\[}}[[REG0]], #12] +; ARM: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #25] +; ARM: strb [[REG10]], {{\[}}[[REG0]], #13] ; ARM: bx lr ; THUMB-LABEL: t6: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldrb r1, [r0, #16] -; THUMB: strb r1, [r0, #4] -; THUMB: ldrb r1, [r0, #17] -; THUMB: strb r1, [r0, #5] -; THUMB: ldrb r1, [r0, #18] -; THUMB: strb r1, [r0, #6] -; THUMB: ldrb r1, [r0, #19] -; THUMB: strb r1, [r0, #7] -; THUMB: ldrb r1, [r0, #20] -; THUMB: strb r1, [r0, #8] -; THUMB: ldrb r1, [r0, #21] -; THUMB: strb r1, [r0, #9] -; THUMB: ldrb r1, [r0, #22] -; THUMB: strb r1, [r0, #10] -; THUMB: ldrb r1, [r0, #23] -; THUMB: strb r1, [r0, #11] -; THUMB: ldrb r1, [r0, #24] -; THUMB: strb r1, [r0, #12] -; THUMB: ldrb r1, [r0, #25] -; THUMB: strb r1, [r0, #13] +; THUMB: ldr [[REG0:r[0-9]+]], [r0] +; THUMB: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #16] +; THUMB: strb [[REG2]], {{\[}}[[REG0]], #4] +; THUMB: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #17] +; THUMB: strb [[REG3]], {{\[}}[[REG0]], #5] +; THUMB: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #18] +; THUMB: strb [[REG4]], {{\[}}[[REG0]], #6] +; THUMB: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #19] +; THUMB: strb [[REG5]], {{\[}}[[REG0]], #7] +; THUMB: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #20] +; THUMB: strb [[REG6]], {{\[}}[[REG0]], #8] +; THUMB: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #21] +; THUMB: strb [[REG7]], {{\[}}[[REG0]], #9] +; THUMB: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #22] +; THUMB: strb [[REG8]], {{\[}}[[REG0]], #10] +; THUMB: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #23] +; THUMB: strb [[REG9]], {{\[}}[[REG0]], #11] +; THUMB: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #24] +; THUMB: strb [[REG10]], {{\[}}[[REG0]], #12] +; THUMB: ldrb [[REG11:r[0-9]+]], {{\[}}[[REG0]], #25] +; THUMB: strb [[REG11]], {{\[}}[[REG0]], #13] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll index f24100b36db9e..95942c271c9ca 100644 --- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll +++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll @@ -2,7 +2,7 @@ define i32 @t1(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t1 +; THUMB-LABEL: t1: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0, #-4] @@ -11,7 +11,7 @@ entry: define i32 @t2(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t2 +; THUMB-LABEL: t2: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0, #-252] @@ -20,7 +20,7 @@ entry: define i32 @t3(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t3 +; THUMB-LABEL: t3: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0] @@ -29,7 +29,7 @@ entry: define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t4 +; THUMB-LABEL: t4: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2] @@ -38,7 +38,7 @@ entry: define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t5 +; THUMB-LABEL: t5: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254] @@ -47,7 +47,7 @@ entry: define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t6 +; THUMB-LABEL: t6: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0] @@ -56,7 +56,7 @@ entry: define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t7 +; THUMB-LABEL: t7: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1] @@ -65,7 +65,7 @@ entry: define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t8 +; THUMB-LABEL: t8: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255] @@ -74,7 +74,7 @@ entry: define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t9 +; THUMB-LABEL: t9: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0] @@ -83,81 +83,96 @@ entry: define void @t10(i32* nocapture %ptr) nounwind { entry: -; THUMB: t10 +; THUMB-LABEL: t10: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0, #-4] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-4] ret void } define void @t11(i32* nocapture %ptr) nounwind { entry: -; THUMB: t11 +; THUMB-LABEL: t11: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0, #-252] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-252] ret void } define void @t12(i32* nocapture %ptr) nounwind { entry: -; THUMB: t12 +; THUMB-LABEL: t12: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]]] ret void } define void @t13(i16* nocapture %ptr) nounwind { entry: -; THUMB: t13 +; THUMB-LABEL: t13: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0, #-2] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-2] ret void } define void @t14(i16* nocapture %ptr) nounwind { entry: -; THUMB: t14 +; THUMB-LABEL: t14: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0, #-254] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-254] ret void } define void @t15(i16* nocapture %ptr) nounwind { entry: -; THUMB: t15 +; THUMB-LABEL: t15: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]]] ret void } define void @t16(i8* nocapture %ptr) nounwind { entry: -; THUMB: t16 +; THUMB-LABEL: t16: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0, #-1] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-1] ret void } define void @t17(i8* nocapture %ptr) nounwind { entry: -; THUMB: t17 +; THUMB-LABEL: t17: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0, #-255] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-255] ret void } define void @t18(i8* nocapture %ptr) nounwind { entry: -; THUMB: t18 +; THUMB-LABEL: t18: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]]] ret void } diff --git a/llvm/test/CodeGen/ARM/fast-isel-select.ll b/llvm/test/CodeGen/ARM/fast-isel-select.ll index 0da63499f3024..70987422dfde7 100644 --- a/llvm/test/CodeGen/ARM/fast-isel-select.ll +++ b/llvm/test/CodeGen/ARM/fast-isel-select.ll @@ -21,14 +21,12 @@ entry: define i32 @t2(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t2 -; ARM: tst r0, #1 -; ARM: moveq r{{[1-9]}}, #20 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: moveq {{r[0-9]+}}, #20 ; THUMB-LABEL: t2 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: moveq r{{[1-9]}}, #20 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: moveq {{r[0-9]+}}, #20 %0 = select i1 %c, i32 %a, i32 20 ret i32 %0 } @@ -43,7 +41,7 @@ entry: ; THUMB: tst.w r0, #1 ; THUMB: it ne ; THUMB: movne r2, r1 -; THUMB: add.w r0, r2, r1 +; THUMB: adds r0, r2, r1 %0 = select i1 %c, i32 %a, i32 %b %1 = add i32 %0, %a ret i32 %1 @@ -67,14 +65,12 @@ entry: define i32 @t5(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t5 -; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #1 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: mvneq {{r[0-9]+}}, #1 ; THUMB: t5 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: mvneq r{{[1-9]}}, #1 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: mvneq {{r[0-9]+}}, #1 %0 = select i1 %c, i32 %a, i32 -2 ret i32 %0 } @@ -83,14 +79,12 @@ entry: define i32 @t6(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t6 -; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #978944 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: mvneq {{r[0-9]+}}, #978944 ; THUMB: t6 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: mvneq r{{[1-9]}}, #978944 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: mvneq {{r[0-9]+}}, #978944 %0 = select i1 %c, i32 %a, i32 -978945 ret i32 %0 } diff --git a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll index ffc3d9a05d88d..3a9011ba622aa 100644 --- a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll +++ b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll @@ -17,26 +17,24 @@ entry: %4 = load i32, i32* %n, align 4 ; ARM: VarArg ; ARM: mov [[FP:r[0-9]+]], sp -; ARM: sub sp, sp, #{{(36|40)}} +; ARM: sub sp, sp, #32 ; ARM: ldr r1, {{\[}}[[FP]], #-4] ; ARM: ldr r2, {{\[}}[[FP]], #-8] ; ARM: ldr r3, {{\[}}[[FP]], #-12] -; ARM: ldr [[Ra:r[0-9]+]], {{\[}}[[FP]], #-16] -; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #{{(16|20)}}] -; ARM: movw [[Rc:[lr]+[0-9]*]], #5 -; Ra got spilled -; ARM: mov r0, [[Rc]] -; ARM: str {{.*}}, [sp] +; ARM: ldr [[Ra:r[0-9]+|lr]], [sp, #16] +; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12] +; ARM: movw r0, #5 +; ARM: str [[Ra]], [sp] ; ARM: str [[Rb]], [sp, #4] ; ARM: bl {{_?CallVariadic}} -; THUMB: sub sp, #{{36}} -; THUMB: ldr r1, [sp, #32] -; THUMB: ldr r2, [sp, #28] -; THUMB: ldr r3, [sp, #24] -; THUMB: ldr {{[a-z0-9]+}}, [sp, #20] -; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16] -; THUMB: str.w {{[a-z0-9]+}}, [sp] -; THUMB: str.w {{[a-z0-9]+}}, [sp, #4] +; THUMB: sub sp, #32 +; THUMB: ldr r1, [sp, #28] +; THUMB: ldr r2, [sp, #24] +; THUMB: ldr r3, [sp, #20] +; THUMB: ldr.w [[Ra:r[0-9]+]], [sp, #16] +; THUMB: ldr.w [[Rb:r[0-9]+]], [sp, #12] +; THUMB: str.w [[Ra]], [sp] +; THUMB: str.w [[Rb]], [sp, #4] ; THUMB: bl {{_?}}CallVariadic %call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) store i32 %call, i32* %tmp, align 4 diff --git a/llvm/test/CodeGen/ARM/ldrd.ll b/llvm/test/CodeGen/ARM/ldrd.ll index b4325c78dbf2a..2bba841413803 100644 --- a/llvm/test/CodeGen/ARM/ldrd.ll +++ b/llvm/test/CodeGen/ARM/ldrd.ll @@ -81,11 +81,12 @@ return: ; preds = %bb, %entry ; CHECK-LABEL: Func1: define void @Func1() nounwind ssp "frame-pointer"="all" { entry: -; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} -; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}} +; A8: movw [[BASER:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} +; A8: movt [[BASER]], :upper16:{{.*}}TestVar{{.*}} +; A8: ldr [[BASE:r[0-9]+]], {{\[}}[[BASER]]] ; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4] -; A8-NEXT: add [[FIELD1]], [[FIELD2]] -; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}} +; A8-NEXT: add [[FIELD2]], [[FIELD1]] +; A8-NEXT: str [[FIELD2]], {{\[}}[[BASE]]{{\]}} ; CONSERVATIVE-NOT: ldrd %orig_blocks = alloca [256 x i16], align 2 %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll index 529775df5fd7d..67ea37aa35033 100644 --- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll +++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll @@ -14,17 +14,17 @@ define i32 @vec_to_int() { ; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: @ implicit-def: $d17 ; CHECK-NEXT: vmov.32 d17[0], r0 -; CHECK-NEXT: vrev32.16 d17, d17 +; CHECK-NEXT: vrev32.16 d18, d17 ; CHECK-NEXT: vrev16.8 d16, d16 -; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vmov.f64 d19, d17 -; CHECK-NEXT: vstmia sp, {d18, d19} @ 16-byte Spill +; CHECK-NEXT: @ kill: def $d16 killed $d16 def $q8 +; CHECK-NEXT: vmov.f64 d17, d18 +; CHECK-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_1: @ %bb.1 ; CHECK-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; CHECK-NEXT: vrev32.16 q9, q8 -; CHECK-NEXT: @ kill: def $d19 killed $d19 killed $q9 -; CHECK-NEXT: vmov.32 r0, d19[0] +; CHECK-NEXT: vrev32.16 q8, q8 +; CHECK-NEXT: vmov.f64 d16, d17 +; CHECK-NEXT: vmov.32 r0, d16[0] ; CHECK-NEXT: add sp, sp, #28 ; CHECK-NEXT: pop {r4} ; CHECK-NEXT: bx lr @@ -41,15 +41,15 @@ bb.1: define i16 @int_to_vec(i80 %in) { ; CHECK-LABEL: int_to_vec: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: @ kill: def $r2 killed $r1 +; CHECK-NEXT: @ kill: def $r2 killed $r0 ; CHECK-NEXT: lsl r0, r0, #16 ; CHECK-NEXT: orr r0, r0, r1, lsr #16 -; CHECK-NEXT: @ implicit-def: $d16 -; CHECK-NEXT: vmov.32 d16[0], r0 -; CHECK-NEXT: @ implicit-def: $q9 -; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vrev32.16 q8, q9 +; CHECK-NEXT: @ implicit-def: $d18 +; CHECK-NEXT: vmov.32 d18[0], r0 +; CHECK-NEXT: @ implicit-def: $q8 +; CHECK-NEXT: vmov.f64 d16, d18 +; CHECK-NEXT: vrev32.16 q8, q8 ; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8 ; CHECK-NEXT: vmov.u16 r0, d16[0] ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/pr47454.ll b/llvm/test/CodeGen/ARM/pr47454.ll index d36a29c4e77ce..399de44ec731a 100644 --- a/llvm/test/CodeGen/ARM/pr47454.ll +++ b/llvm/test/CodeGen/ARM/pr47454.ll @@ -16,23 +16,23 @@ define internal fastcc void @main() { ; CHECK-NEXT: ldrh r0, [r11, #-2] ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vstr s0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vstr s0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl getConstant ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: vldr s0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: uxth r1, r1 +; CHECK-NEXT: vmov s0, r1 ; CHECK-NEXT: uxth r0, r0 -; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: uxth r1, r0 -; CHECK-NEXT: vmov s1, r1 +; CHECK-NEXT: vmov s1, r0 ; CHECK-NEXT: bl isEqual ; CHECK-NEXT: mov sp, r11 ; CHECK-NEXT: pop {r11, pc} diff --git a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll index 02ee9c067f223..f2d9a5c0f7fd3 100644 --- a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll +++ b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll @@ -3,11 +3,12 @@ ; Verify that the offset assigned to the stack protector is at the top of the ; frame, covering the locals. ; CHECK-LABEL: fn: -; CHECK: sub sp, sp, #32 +; CHECK: sub sp, sp, #24 ; CHECK-NEXT: sub sp, sp, #65536 ; CHECK-NEXT: ldr r1, .LCPI0_0 -; CHECK-NEXT: ldr r2, [r1] +; CHECK-NEXT: str r1, [sp, #8] +; CHECK-NEXT: ldr r1, [r1] ; CHECK-NEXT: add lr, sp, #65536 -; CHECK-NEXT: str r2, [lr, #28] +; CHECK-NEXT: str r1, [lr, #20] ; CHECK: .LCPI0_0: ; CHECK-NEXT: .long __stack_chk_guard diff --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll index d96bc0249b42f..7968230ccab2e 100644 --- a/llvm/test/CodeGen/ARM/swifterror.ll +++ b/llvm/test/CodeGen/ARM/swifterror.ll @@ -21,9 +21,9 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-O0: mov r{{.*}}, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID2:r[0-9]+]], r0 -; CHECK-O0: mov [[ID:r[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [r0, #8] ; CHECK-O0: mov r8, [[ID2]] +; CHECK-O0: mov [[ID:r[0-9]+]], #1 +; CHECK-O0: strb [[ID]], {{\[}}[[ID2]], #8] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -49,16 +49,16 @@ define float @caller(i8* %error_ref) { ; CHECK-O0-LABEL: caller: ; spill r0 ; CHECK-O0-DAG: mov r8, #0 -; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]] +; CHECK-O0-DAG: str r0, [sp[[SLOT:(, #[0-9]+)?]]] ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[TMP:r[0-9]+]], r8 -; CHECK-O0: str [[TMP]], [sp] +; CHECK-O0: str [[TMP]], [sp[[SLOT2:(, #[0-9]+)?]]] ; CHECK-O0: bne +; CHECK-O0: ldr [[ID:r[0-9]+]], [sp[[SLOT]]] ; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] -; CHECK-O0: ldr [[ID:r[0-9]+]], [sp, [[SLOT]]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; reload r0 -; CHECK-O0: ldr r0, [sp] +; CHECK-O0: ldr r0, [sp[[SLOT2]]] ; CHECK-O0: free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -98,14 +98,14 @@ define float @caller2(i8* %error_ref) { ; CHECK-O0-DAG: mov r8, #0 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov r{{.*}}, r8 -; CHECK-O0: str r0, [sp] +; CHECK-O0: str r0, [sp{{(, #[0-9]+)?}}] ; CHECK-O0: bne ; CHECK-O0: ble -; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] ; reload r0 ; CHECK-O0: ldr [[ID:r[0-9]+]], +; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-O0: ldr r0, [sp] +; CHECK-O0: ldr r0, [sp{{(, #[0-9]+)?}}] ; CHECK-O0: free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -143,16 +143,15 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: cmp r0, #0 ; spill to stack ; CHECK-O0: str r8 +; CHECK-O0: cmp r0, #0 ; CHECK-O0: beq ; CHECK-O0: mov r0, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID:r[0-9]+]], r0 ; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1 -; CHECK-O0: strb [[ID2]], [r0, #8] -; CHECK-O0: mov r8, [[ID]] +; CHECK-O0: strb [[ID2]], {{\[}}[[ID]], #8] ; reload from stack ; CHECK-O0: ldr r8 entry: @@ -233,18 +232,18 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4] ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: mov r{{.*}}, #16 +; CHECK-O0-DAG: mov r{{.*}}, #16 ; spill to stack: sret and val1 ; CHECK-O0-DAG: str r0 ; CHECK-O0-DAG: str r1 ; CHECK-O0: malloc -; CHECK-O0: mov [[ID:r[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [r0, #8] ; reload from stack: sret and val1 ; CHECK-O0: ldr ; CHECK-O0: ldr -; CHECK-O0: str r{{.*}}, [{{.*}}, #4] -; CHECK-O0: mov r8 +; CHECK-O0-DAG: mov r8 +; CHECK-O0-DAG: mov [[ID:r[0-9]+]], #1 +; CHECK-O0-DAG: strb [[ID]], [{{r[0-9]+}}, #8] +; CHECK-O0-DAG: str r{{.*}}, [{{.*}}, #4] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -271,16 +270,15 @@ define float @caller3(i8* %error_ref) { ; CHECK-O0-LABEL: caller3: ; CHECK-O0-DAG: mov r8, #0 -; CHECK-O0-DAG: mov r0 ; CHECK-O0-DAG: mov r1 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:r[0-9]+]], r8 -; CHECK-O0: cmp r8 ; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]] +; CHECK-O0: cmp r8 ; CHECK-O0: bne ; Access part of the error object and save it to error_ref -; CHECK-O0: ldrb [[CODE:r[0-9]+]] ; CHECK-O0: ldr [[ID:r[0-9]+]] +; CHECK-O0: ldrb [[CODE:r[0-9]+]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK-O0: ldr r0, [sp[[SLOT]] ; CHECK-O0: bl {{.*}}free diff --git a/llvm/test/CodeGen/ARM/thumb-big-stack.ll b/llvm/test/CodeGen/ARM/thumb-big-stack.ll index 954c12634cff3..e5cbb9747a7e8 100644 --- a/llvm/test/CodeGen/ARM/thumb-big-stack.ll +++ b/llvm/test/CodeGen/ARM/thumb-big-stack.ll @@ -12,7 +12,7 @@ target triple = "thumbv7s-apple-ios" ; CHECK: movw [[ADDR:(r[0-9]+|lr)]], # ; CHECK-NEXT: add [[ADDR]], sp ; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, {{\[}}[[ADDR]]:128] -define <4 x float> @f(<4 x float> %x, float %val) { +define <4 x float> @f(<4 x float> %x) { entry: %.compoundliteral7837 = alloca <4 x float>, align 16 %.compoundliteral7865 = alloca <4 x float>, align 16 @@ -143,9 +143,9 @@ entry: %.compoundliteral13969 = alloca <4 x float>, align 16 %.compoundliteral13983 = alloca <4 x float>, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -153,17 +153,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add68 = fadd <4 x float> %tmp1, %tmp tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add68, <4 x float>* undef, align 16 + store <4 x float> %add68, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp2 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add76 = fadd float %val, 0x4074C999A0000000 + %add76 = fadd float undef, 0x4074C999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp3 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins77 = insertelement <4 x float> %tmp3, float %add76, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins77, <4 x float>* undef, align 16 + store <4 x float> %vecins77, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp4 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -175,15 +175,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins80 = insertelement <4 x float> %tmp5, float %add79, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins80, <4 x float>* undef, align 16 + store <4 x float> %vecins80, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp6 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add82 = fadd <4 x float> undef, %tmp6 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add82, <4 x float>* undef, align 16 + store <4 x float> %add82, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp7 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -195,19 +195,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins85 = insertelement <4 x float> %tmp8, float %add84, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins85, <4 x float>* undef, align 16 + store <4 x float> %vecins85, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp9 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext86 = extractelement <4 x float> %tmp9, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add93 = fadd float %val, 0xC076C66660000000 + %add93 = fadd float undef, 0xC076C66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp10 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins94 = insertelement <4 x float> %tmp10, float %add93, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp11 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -223,17 +223,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp14 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins102 = insertelement <4 x float> undef, float %val, i32 1 + %vecins102 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins102, <4 x float>* undef, align 16 + store <4 x float> %vecins102, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp15 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add104 = fadd float %val, 0x406AB999A0000000 + %add104 = fadd float undef, 0x406AB999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp16 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext579 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -243,7 +243,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins581 = insertelement <4 x float> %tmp17, float %add580, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins581, <4 x float>* undef, align 16 + store <4 x float> %vecins581, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp18 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -251,7 +251,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add583 = fadd float %vecext582, 0x40444CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp19 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -261,25 +261,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins592 = insertelement <4 x float> undef, float %add591, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins592, <4 x float>* undef, align 16 + store <4 x float> %vecins592, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp20 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add594 = fadd float %val, 0xC05B466660000000 + %add594 = fadd float undef, 0xC05B466660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add605 = fadd float %val, 0x407164CCC0000000 + %add605 = fadd float undef, 0x407164CCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp21 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add616 = fadd float %val, 1.885000e+02 + %add616 = fadd float undef, 1.885000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp22 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp23 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins620 = insertelement <4 x float> undef, float %val, i32 1 + %vecins620 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins620, <4 x float>* undef, align 16 + store <4 x float> %vecins620, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext621 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -287,7 +287,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins623 = insertelement <4 x float> undef, float %add622, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins623, <4 x float>* undef, align 16 + store <4 x float> %vecins623, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp24 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -299,9 +299,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins626 = insertelement <4 x float> %tmp25, float %add625, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins626, <4 x float>* undef, align 16 + store <4 x float> %vecins626, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp26 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -309,7 +309,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add628 = fadd <4 x float> %tmp27, %tmp26 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add628, <4 x float>* undef, align 16 + store <4 x float> %add628, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp28 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -321,7 +321,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins631 = insertelement <4 x float> %tmp29, float %add630, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins631, <4 x float>* undef, align 16 + store <4 x float> %vecins631, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp30 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -333,7 +333,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins634 = insertelement <4 x float> %tmp31, float %add633, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins634, <4 x float>* undef, align 16 + store <4 x float> %vecins634, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp32 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -347,13 +347,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp35 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add658 = fadd float %val, 0xC04A4CCCC0000000 + %add658 = fadd float undef, 0xC04A4CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext663 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp36 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins665 = insertelement <4 x float> %tmp36, float %val, i32 2 + %vecins665 = insertelement <4 x float> %tmp36, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext694 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -363,31 +363,31 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins696 = insertelement <4 x float> %tmp37, float %add695, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins696, <4 x float>* undef, align 16 + store <4 x float> %vecins696, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp38 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext699 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add703 = fadd float %val, 0x4068F33340000000 + %add703 = fadd float undef, 0x4068F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins704 = insertelement <4 x float> undef, float %add703, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins704, <4 x float>* undef, align 16 + store <4 x float> %vecins704, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp39 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp40 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins710 = insertelement <4 x float> %tmp40, float %val, i32 3 + %vecins710 = insertelement <4 x float> %tmp40, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins710, <4 x float>* undef, align 16 + store <4 x float> %vecins710, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp41 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -395,7 +395,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add712 = fadd <4 x float> %tmp42, %tmp41 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add712, <4 x float>* undef, align 16 + store <4 x float> %add712, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp43 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -403,7 +403,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp44 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins715 = insertelement <4 x float> %tmp44, float %val, i32 0 + %vecins715 = insertelement <4 x float> %tmp44, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp45 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -415,19 +415,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins718 = insertelement <4 x float> %tmp46, float %add717, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins718, <4 x float>* undef, align 16 + store <4 x float> %vecins718, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp47 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext719 = extractelement <4 x float> %tmp47, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add723 = fadd float %val, 0xC06A6CCCC0000000 + %add723 = fadd float undef, 0xC06A6CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins724 = insertelement <4 x float> undef, float %add723, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add726 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext730 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -437,19 +437,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins732 = insertelement <4 x float> %tmp48, float %add731, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins732, <4 x float>* undef, align 16 + store <4 x float> %vecins732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp49 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext733 = extractelement <4 x float> %tmp49, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp50 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins738 = insertelement <4 x float> %tmp50, float %val, i32 3 + %vecins738 = insertelement <4 x float> %tmp50, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp51 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -465,7 +465,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins743 = insertelement <4 x float> %tmp53, float %add742, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins743, <4 x float>* undef, align 16 + store <4 x float> %vecins743, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp54 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -473,7 +473,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add754 = fadd <4 x float> %tmp55, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add754, <4 x float>* undef, align 16 + store <4 x float> %add754, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp56 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -485,7 +485,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins757 = insertelement <4 x float> %tmp57, float %add756, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add765 = fadd float %val, 0x405BA66660000000 + %add765 = fadd float undef, 0x405BA66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp58 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -501,11 +501,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins771 = insertelement <4 x float> %tmp60, float %add770, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins771, <4 x float>* undef, align 16 + store <4 x float> %vecins771, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp61 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add776 = fadd float %val, 0xC055F33340000000 + %add776 = fadd float undef, 0xC055F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins777 = insertelement <4 x float> undef, float %add776, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -515,7 +515,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add782 = fadd <4 x float> %tmp63, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add782, <4 x float>* undef, align 16 + store <4 x float> %add782, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp64 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -523,25 +523,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add784 = fadd float %vecext783, -3.455000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add796 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add796, <4 x float>* undef, align 16 + store <4 x float> %add796, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp65 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add801 = fadd float %val, 3.045000e+02 + %add801 = fadd float undef, 3.045000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp66 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins802 = insertelement <4 x float> %tmp66, float %add801, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins802, <4 x float>* undef, align 16 + store <4 x float> %vecins802, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext803 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp67 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -549,7 +549,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add810 = fadd <4 x float> undef, %tmp68 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add810, <4 x float>* undef, align 16 + store <4 x float> %add810, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp69 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -557,17 +557,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp70 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins813 = insertelement <4 x float> %tmp70, float %val, i32 0 + %vecins813 = insertelement <4 x float> %tmp70, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext817 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add818 = fadd float %vecext817, -4.830000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins822 = insertelement <4 x float> undef, float %val, i32 3 + %vecins822 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins822, <4 x float>* undef, align 16 + store <4 x float> %vecins822, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp71 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -577,17 +577,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add838 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add838, <4 x float>* undef, align 16 + store <4 x float> %add838, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp73 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext839 = extractelement <4 x float> %tmp73, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add849 = fadd float %val, 0xC07C266660000000 + %add849 = fadd float undef, 0xC07C266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp74 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -609,9 +609,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins861 = insertelement <4 x float> %tmp77, float %add860, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins889 = insertelement <4 x float> undef, float %val, i32 2 + %vecins889 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins889, <4 x float>* undef, align 16 + store <4 x float> %vecins889, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp78 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -623,9 +623,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins892 = insertelement <4 x float> %tmp79, float %add891, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins892, <4 x float>* undef, align 16 + store <4 x float> %vecins892, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp80 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -633,7 +633,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add894 = fadd <4 x float> %tmp81, %tmp80 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add894, <4 x float>* undef, align 16 + store <4 x float> %add894, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext895 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -659,7 +659,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins903 = insertelement <4 x float> %tmp84, float %add902, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins903, <4 x float>* undef, align 16 + store <4 x float> %vecins903, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext904 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -669,7 +669,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins906 = insertelement <4 x float> %tmp85, float %add905, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp86 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -677,13 +677,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add908 = fadd <4 x float> %tmp87, %tmp86 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add908, <4 x float>* undef, align 16 + store <4 x float> %add908, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp88 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp89 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp90 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -703,7 +703,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins917 = insertelement <4 x float> %tmp92, float %add916, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins917, <4 x float>* undef, align 16 + store <4 x float> %vecins917, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp93 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -715,17 +715,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins920 = insertelement <4 x float> %tmp94, float %add919, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins920, <4 x float>* undef, align 16 + store <4 x float> %vecins920, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp95 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins925 = insertelement <4 x float> %tmp95, float %val, i32 0 + %vecins925 = insertelement <4 x float> %tmp95, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins925, <4 x float>* undef, align 16 + store <4 x float> %vecins925, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp96 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add927 = fadd float %val, 0xC0501999A0000000 + %add927 = fadd float undef, 0xC0501999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp97 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -739,7 +739,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins931 = insertelement <4 x float> %tmp98, float %add930, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp99 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -747,11 +747,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext937 = extractelement <4 x float> %tmp100, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add941 = fadd float %val, -4.665000e+02 + %add941 = fadd float undef, -4.665000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins942 = insertelement <4 x float> undef, float %add941, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins942, <4 x float>* undef, align 16 + store <4 x float> %vecins942, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp101 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -763,29 +763,29 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins945 = insertelement <4 x float> %tmp102, float %add944, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins945, <4 x float>* undef, align 16 + store <4 x float> %vecins945, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp103 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add947 = fadd float %val, 0xC051933340000000 + %add947 = fadd float undef, 0xC051933340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp104 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins948 = insertelement <4 x float> %tmp104, float %add947, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins948, <4 x float>* undef, align 16 + store <4 x float> %vecins948, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp105 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add955 = fadd float %val, 0x4077F4CCC0000000 + %add955 = fadd float undef, 0x4077F4CCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp106 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins956 = insertelement <4 x float> %tmp106, float %add955, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins956, <4 x float>* undef, align 16 + store <4 x float> %vecins956, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext971 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -795,17 +795,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins973 = insertelement <4 x float> %tmp107, float %add972, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins973, <4 x float>* undef, align 16 + store <4 x float> %vecins973, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp108 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext974 = extractelement <4 x float> %tmp108, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins976 = insertelement <4 x float> undef, float %val, i32 3 + %vecins976 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins976, <4 x float>* undef, align 16 + store <4 x float> %vecins976, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp109 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -817,7 +817,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp112 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext982 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -825,7 +825,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins984 = insertelement <4 x float> undef, float %add983, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins984, <4 x float>* undef, align 16 + store <4 x float> %vecins984, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp113 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -837,25 +837,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins987 = insertelement <4 x float> %tmp114, float %add986, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins987, <4 x float>* undef, align 16 + store <4 x float> %vecins987, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp115 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp116 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins995 = insertelement <4 x float> %tmp116, float %val, i32 0 + %vecins995 = insertelement <4 x float> %tmp116, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins995, <4 x float>* undef, align 16 + store <4 x float> %vecins995, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp117 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add997 = fadd float %val, 0xC0798999A0000000 + %add997 = fadd float undef, 0xC0798999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp118 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins998 = insertelement <4 x float> %tmp118, float %add997, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins998, <4 x float>* undef, align 16 + store <4 x float> %vecins998, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp119 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -865,7 +865,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp120 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp121 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -879,13 +879,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1031 = fadd float %vecext1030, 2.010000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp123 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp124 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1085 = insertelement <4 x float> %tmp124, float %val, i32 2 + %vecins1085 = insertelement <4 x float> %tmp124, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp125 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -897,13 +897,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1088 = insertelement <4 x float> %tmp126, float %add1087, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1088, <4 x float>* undef, align 16 + store <4 x float> %vecins1088, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp127 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1090 = fadd <4 x float> undef, %tmp127 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp128 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -915,7 +915,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1096 = insertelement <4 x float> %tmp129, float %add1095, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1096, <4 x float>* undef, align 16 + store <4 x float> %vecins1096, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp130 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -927,7 +927,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1099 = insertelement <4 x float> %tmp131, float %add1098, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1099, <4 x float>* undef, align 16 + store <4 x float> %vecins1099, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp132 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -939,9 +939,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1102 = insertelement <4 x float> %tmp133, float %add1101, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1102, <4 x float>* undef, align 16 + store <4 x float> %vecins1102, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp134 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -961,9 +961,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp137 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1110 = insertelement <4 x float> %tmp137, float %val, i32 1 + %vecins1110 = insertelement <4 x float> %tmp137, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1110, <4 x float>* undef, align 16 + store <4 x float> %vecins1110, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp138 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -975,21 +975,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1113 = insertelement <4 x float> %tmp139, float %add1112, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1113, <4 x float>* undef, align 16 + store <4 x float> %vecins1113, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1115 = fadd float %val, 0x4072B33340000000 + %add1115 = fadd float undef, 0x4072B33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1116 = insertelement <4 x float> undef, float %add1115, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1116, <4 x float>* undef, align 16 + store <4 x float> %vecins1116, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp140 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1118 = fadd <4 x float> %tmp140, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1118, <4 x float>* undef, align 16 + store <4 x float> %add1118, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp141 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -999,7 +999,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1121 = insertelement <4 x float> undef, float %add1120, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1121, <4 x float>* undef, align 16 + store <4 x float> %vecins1121, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp142 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1013,9 +1013,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1125 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1127 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1127 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1127, <4 x float>* undef, align 16 + store <4 x float> %vecins1127, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp144 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1027,7 +1027,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1130 = insertelement <4 x float> %tmp145, float %add1129, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp146 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1045,7 +1045,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1135 = insertelement <4 x float> %tmp149, float %add1134, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1135, <4 x float>* undef, align 16 + store <4 x float> %vecins1135, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp150 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1053,13 +1053,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp151 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1138 = insertelement <4 x float> %tmp151, float %val, i32 1 + %vecins1138 = insertelement <4 x float> %tmp151, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1138, <4 x float>* undef, align 16 + store <4 x float> %vecins1138, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp152 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1140 = fadd float %val, 0x407AE999A0000000 + %add1140 = fadd float undef, 0x407AE999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp153 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1073,7 +1073,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1144 = insertelement <4 x float> %tmp154, float %add1143, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1144, <4 x float>* undef, align 16 + store <4 x float> %vecins1144, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp155 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1081,27 +1081,27 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1146 = fadd <4 x float> %tmp156, %tmp155 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1146, <4 x float>* undef, align 16 + store <4 x float> %add1146, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp157 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1148 = fadd float %val, 4.145000e+02 + %add1148 = fadd float undef, 4.145000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp158 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1158 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1158 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1158, <4 x float>* undef, align 16 + store <4 x float> %vecins1158, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1218 = fadd float %val, 0xC078733340000000 + %add1218 = fadd float undef, 0xC078733340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1219 = insertelement <4 x float> undef, float %add1218, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp159 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1113,7 +1113,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1281 = insertelement <4 x float> %tmp160, float %add1280, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1281, <4 x float>* undef, align 16 + store <4 x float> %vecins1281, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp161 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1125,7 +1125,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1284 = insertelement <4 x float> %tmp162, float %add1283, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1284, <4 x float>* undef, align 16 + store <4 x float> %vecins1284, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp163 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1133,27 +1133,27 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1286 = fadd <4 x float> %tmp164, %tmp163 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1286, <4 x float>* undef, align 16 + store <4 x float> %add1286, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp165 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1288 = fadd float %val, 0xC0731199A0000000 + %add1288 = fadd float undef, 0xC0731199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp166 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp167 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1444 = extractelement <4 x float> %tmp167, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1460 = insertelement <4 x float> undef, float %val, i32 1 + %vecins1460 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1460, <4 x float>* undef, align 16 + store <4 x float> %vecins1460, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp168 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1462 = fadd float %val, -1.670000e+02 + %add1462 = fadd float undef, -1.670000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1463 = insertelement <4 x float> undef, float %add1462, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1167,9 +1167,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1466 = insertelement <4 x float> %tmp170, float %add1465, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1466, <4 x float>* undef, align 16 + store <4 x float> %vecins1466, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp171 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1177,17 +1177,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1468 = fadd <4 x float> %tmp172, %tmp171 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1468, <4 x float>* undef, align 16 + store <4 x float> %add1468, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp173 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1470 = fadd float %val, 0x4033B33340000000 + %add1470 = fadd float undef, 0x4033B33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp174 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1471 = insertelement <4 x float> %tmp174, float %add1470, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1471, <4 x float>* undef, align 16 + store <4 x float> %vecins1471, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp175 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1205,9 +1205,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp178 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1477 = insertelement <4 x float> %tmp178, float %val, i32 2 + %vecins1477 = insertelement <4 x float> %tmp178, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1477, <4 x float>* undef, align 16 + store <4 x float> %vecins1477, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp179 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1219,15 +1219,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1480 = insertelement <4 x float> %tmp180, float %add1479, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1480, <4 x float>* undef, align 16 + store <4 x float> %vecins1480, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp181 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp182 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp183 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1241,9 +1241,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1486 = extractelement <4 x float> %tmp185, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1502 = insertelement <4 x float> undef, float %val, i32 1 + %vecins1502 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1502, <4 x float>* undef, align 16 + store <4 x float> %vecins1502, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1503 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1253,7 +1253,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1505 = insertelement <4 x float> %tmp186, float %add1504, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1505, <4 x float>* undef, align 16 + store <4 x float> %vecins1505, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp187 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1265,9 +1265,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1508 = insertelement <4 x float> %tmp188, float %add1507, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1508, <4 x float>* undef, align 16 + store <4 x float> %vecins1508, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp189 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1275,7 +1275,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1510 = fadd <4 x float> %tmp190, %tmp189 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1510, <4 x float>* undef, align 16 + store <4 x float> %add1510, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp191 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1289,13 +1289,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1656 = insertelement <4 x float> %tmp193, float %add1655, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1656, <4 x float>* undef, align 16 + store <4 x float> %vecins1656, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1658 = fadd float %val, 0x40709999A0000000 + %add1658 = fadd float undef, 0x40709999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp194 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1660 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1305,19 +1305,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1662 = insertelement <4 x float> %tmp195, float %add1661, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1662, <4 x float>* undef, align 16 + store <4 x float> %vecins1662, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1676 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1676 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp196 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1692 = fadd <4 x float> %tmp196, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1692, <4 x float>* undef, align 16 + store <4 x float> %add1692, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp197 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1329,7 +1329,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1695 = insertelement <4 x float> %tmp198, float %add1694, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1695, <4 x float>* undef, align 16 + store <4 x float> %vecins1695, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp199 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1341,7 +1341,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1698 = insertelement <4 x float> %tmp200, float %add1697, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1698, <4 x float>* undef, align 16 + store <4 x float> %vecins1698, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp201 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1349,15 +1349,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp202 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1701 = insertelement <4 x float> %tmp202, float %val, i32 2 + %vecins1701 = insertelement <4 x float> %tmp202, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1701, <4 x float>* undef, align 16 + store <4 x float> %vecins1701, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp203 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1704 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1704 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp204 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1365,9 +1365,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp206 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1709 = insertelement <4 x float> %tmp206, float %val, i32 0 + %vecins1709 = insertelement <4 x float> %tmp206, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1709, <4 x float>* undef, align 16 + store <4 x float> %vecins1709, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp207 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1375,11 +1375,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1714 = fadd float %vecext1713, 0xC0703199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1723 = insertelement <4 x float> undef, float %val, i32 0 + %vecins1723 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp208 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1730 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1389,9 +1389,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1732 = insertelement <4 x float> %tmp209, float %add1731, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1732, <4 x float>* undef, align 16 + store <4 x float> %vecins1732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp210 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1399,7 +1399,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp211 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1736 = fadd float %val, 0x407C3999A0000000 + %add1736 = fadd float undef, 0x407C3999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp212 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1415,7 +1415,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1740 = insertelement <4 x float> %tmp214, float %add1739, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1740, <4 x float>* undef, align 16 + store <4 x float> %vecins1740, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp215 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1427,25 +1427,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1743 = insertelement <4 x float> %tmp216, float %add1742, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1743, <4 x float>* undef, align 16 + store <4 x float> %vecins1743, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1744 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp217 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1746 = insertelement <4 x float> %tmp217, float %val, i32 3 + %vecins1746 = insertelement <4 x float> %tmp217, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp218 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1748 = fadd <4 x float> undef, %tmp218 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1748, <4 x float>* undef, align 16 + store <4 x float> %add1748, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp219 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1750 = fadd float %val, 0x407C6B3340000000 + %add1750 = fadd float undef, 0x407C6B3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1751 = insertelement <4 x float> undef, float %add1750, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1467,21 +1467,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp223 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1759 = fadd float %val, 0x40678999A0000000 + %add1759 = fadd float undef, 0x40678999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp224 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1760 = insertelement <4 x float> %tmp224, float %add1759, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1760, <4 x float>* undef, align 16 + store <4 x float> %vecins1760, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp225 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1762 = fadd <4 x float> undef, %tmp225 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1762, <4 x float>* undef, align 16 + store <4 x float> %add1762, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp226 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1493,7 +1493,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1765 = insertelement <4 x float> %tmp227, float %add1764, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1765, <4 x float>* undef, align 16 + store <4 x float> %vecins1765, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp228 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1505,7 +1505,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1768 = insertelement <4 x float> %tmp229, float %add1767, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1768, <4 x float>* undef, align 16 + store <4 x float> %vecins1768, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1769 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1515,7 +1515,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1771 = insertelement <4 x float> %tmp230, float %add1770, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1771, <4 x float>* undef, align 16 + store <4 x float> %vecins1771, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp231 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1525,13 +1525,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp234 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1779 = insertelement <4 x float> %tmp234, float %val, i32 0 + %vecins1779 = insertelement <4 x float> %tmp234, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1779, <4 x float>* undef, align 16 + store <4 x float> %vecins1779, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp235 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp236 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1541,9 +1541,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1785 = insertelement <4 x float> undef, float %add1784, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1785, <4 x float>* undef, align 16 + store <4 x float> %vecins1785, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp237 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1559,25 +1559,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1793 = insertelement <4 x float> %tmp239, float %add1792, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1793, <4 x float>* undef, align 16 + store <4 x float> %vecins1793, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp240 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1795 = fadd float %val, 0x4055266660000000 + %add1795 = fadd float undef, 0x4055266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp241 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1796 = insertelement <4 x float> %tmp241, float %add1795, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1799 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1799 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1800 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp242 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp243 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1587,7 +1587,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp246 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1865 = fadd float %val, -2.235000e+02 + %add1865 = fadd float undef, -2.235000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp247 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1597,33 +1597,33 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp249 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1872 = insertelement <4 x float> %tmp249, float %val, i32 3 + %vecins1872 = insertelement <4 x float> %tmp249, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp250 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1874 = fadd <4 x float> %tmp250, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1874, <4 x float>* undef, align 16 + store <4 x float> %add1874, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1875 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp251 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1894 = insertelement <4 x float> %tmp251, float %val, i32 1 + %vecins1894 = insertelement <4 x float> %tmp251, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp252 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1895 = extractelement <4 x float> %tmp252, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1900 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1900 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1900, <4 x float>* undef, align 16 + store <4 x float> %vecins1900, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1905 = insertelement <4 x float> undef, float %val, i32 0 + %vecins1905 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1905, <4 x float>* undef, align 16 + store <4 x float> %vecins1905, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp253 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1633,7 +1633,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1908 = insertelement <4 x float> undef, float %add1907, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1908, <4 x float>* undef, align 16 + store <4 x float> %vecins1908, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1909 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1649,23 +1649,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1916 = fadd <4 x float> %tmp256, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1916, <4 x float>* undef, align 16 + store <4 x float> %add1916, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1923 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp257 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1927 = fadd float %val, 0x40761999A0000000 + %add1927 = fadd float undef, 0x40761999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp258 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1928 = insertelement <4 x float> %tmp258, float %add1927, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1928, <4 x float>* undef, align 16 + store <4 x float> %vecins1928, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp259 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1677,9 +1677,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp262 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1933 = insertelement <4 x float> %tmp262, float %val, i32 0 + %vecins1933 = insertelement <4 x float> %tmp262, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1933, <4 x float>* undef, align 16 + store <4 x float> %vecins1933, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp263 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1693,15 +1693,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1940 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1942 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1942 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp265 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp266 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp267 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1709,13 +1709,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1946 = fadd float %vecext1945, 0xC074866660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1953 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1953 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1953, <4 x float>* undef, align 16 + store <4 x float> %vecins1953, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp268 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp269 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1737,15 +1737,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1964 = insertelement <4 x float> %tmp272, float %add1963, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1964, <4 x float>* undef, align 16 + store <4 x float> %vecins1964, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1965 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp273 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1967 = insertelement <4 x float> %tmp273, float %val, i32 2 + %vecins1967 = insertelement <4 x float> %tmp273, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1967, <4 x float>* undef, align 16 + store <4 x float> %vecins1967, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp274 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1757,9 +1757,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1970 = insertelement <4 x float> %tmp275, float %add1969, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1970, <4 x float>* undef, align 16 + store <4 x float> %vecins1970, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp276 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1767,31 +1767,31 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp278 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1975 = insertelement <4 x float> %tmp278, float %val, i32 0 + %vecins1975 = insertelement <4 x float> %tmp278, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1975, <4 x float>* undef, align 16 + store <4 x float> %vecins1975, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp279 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1976 = extractelement <4 x float> %tmp279, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1978 = insertelement <4 x float> undef, float %val, i32 1 + %vecins1978 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1978, <4 x float>* undef, align 16 + store <4 x float> %vecins1978, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1979 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1981 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1981 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1981, <4 x float>* undef, align 16 + store <4 x float> %vecins1981, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1984 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1984 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1984, <4 x float>* undef, align 16 + store <4 x float> %vecins1984, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1990 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1803,11 +1803,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1998 = insertelement <4 x float> %tmp280, float %add1997, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1998, <4 x float>* undef, align 16 + store <4 x float> %vecins1998, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2004 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1817,7 +1817,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2006 = insertelement <4 x float> %tmp281, float %add2005, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2006, <4 x float>* undef, align 16 + store <4 x float> %vecins2006, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp282 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1825,7 +1825,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp283 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2009 = insertelement <4 x float> %tmp283, float %val, i32 2 + %vecins2009 = insertelement <4 x float> %tmp283, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp284 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1837,15 +1837,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2012 = insertelement <4 x float> %tmp285, float %add2011, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2012, <4 x float>* undef, align 16 + store <4 x float> %vecins2012, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp286 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp287 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp288 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1857,7 +1857,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2017 = insertelement <4 x float> %tmp289, float %add2016, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2022 = fadd float %val, 8.350000e+01 + %add2022 = fadd float undef, 8.350000e+01 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp290 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1871,7 +1871,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2028 = fadd <4 x float> %tmp292, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2028, <4 x float>* undef, align 16 + store <4 x float> %add2028, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2029 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1879,11 +1879,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp293 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp294 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2036 = fadd float %val, 0x407DE66660000000 + %add2036 = fadd float undef, 0x407DE66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp295 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1895,9 +1895,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp299 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2045 = insertelement <4 x float> %tmp299, float %val, i32 0 + %vecins2045 = insertelement <4 x float> %tmp299, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2045, <4 x float>* undef, align 16 + store <4 x float> %vecins2045, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp300 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1905,35 +1905,35 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2047 = fadd float %vecext2046, 0xC065433340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2052 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp301 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2054 = insertelement <4 x float> %tmp301, float %val, i32 3 + %vecins2054 = insertelement <4 x float> %tmp301, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2054, <4 x float>* undef, align 16 + store <4 x float> %vecins2054, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp302 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2056 = fadd <4 x float> undef, %tmp302 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2056, <4 x float>* undef, align 16 + store <4 x float> %add2056, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp303 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp304 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2062 = insertelement <4 x float> %tmp304, float %val, i32 1 + %vecins2062 = insertelement <4 x float> %tmp304, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2062, <4 x float>* undef, align 16 + store <4 x float> %vecins2062, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp305 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp306 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1943,9 +1943,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2068 = insertelement <4 x float> undef, float %add2067, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2068, <4 x float>* undef, align 16 + store <4 x float> %vecins2068, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp307 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1953,7 +1953,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2070 = fadd <4 x float> %tmp308, %tmp307 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2070, <4 x float>* undef, align 16 + store <4 x float> %add2070, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp309 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1965,7 +1965,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2073 = insertelement <4 x float> %tmp310, float %add2072, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2073, <4 x float>* undef, align 16 + store <4 x float> %vecins2073, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp311 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1973,7 +1973,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp312 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2076 = insertelement <4 x float> %tmp312, float %val, i32 1 + %vecins2076 = insertelement <4 x float> %tmp312, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp313 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1985,7 +1985,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2079 = insertelement <4 x float> %tmp314, float %add2078, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2079, <4 x float>* undef, align 16 + store <4 x float> %vecins2079, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp315 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1997,15 +1997,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2082 = insertelement <4 x float> %tmp316, float %add2081, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2082, <4 x float>* undef, align 16 + store <4 x float> %vecins2082, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp317 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp318 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp319 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2015,7 +2015,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2087 = insertelement <4 x float> undef, float %add2086, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2087, <4 x float>* undef, align 16 + store <4 x float> %vecins2087, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2480 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2029,23 +2029,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2485 = insertelement <4 x float> %tmp320, float %add2484, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2485, <4 x float>* undef, align 16 + store <4 x float> %vecins2485, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp321 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2487 = fadd float %val, 2.030000e+02 + %add2487 = fadd float undef, 2.030000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp322 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2491 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp323 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp324 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2055,9 +2055,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp325 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2499 = insertelement <4 x float> undef, float %val, i32 2 + %vecins2499 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2499, <4 x float>* undef, align 16 + store <4 x float> %vecins2499, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2500 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2079,7 +2079,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp329 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2534 = fadd float %val, 0x4072C66660000000 + %add2534 = fadd float undef, 0x4072C66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2536 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2089,15 +2089,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2538 = insertelement <4 x float> %tmp330, float %add2537, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2538, <4 x float>* undef, align 16 + store <4 x float> %vecins2538, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2539 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2540 = fadd float %vecext2539, 0x406F9999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2580 = insertelement <4 x float> undef, float %val, i32 1 + %vecins2580 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2580, <4 x float>* undef, align 16 + store <4 x float> %vecins2580, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp331 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2107,7 +2107,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2583 = insertelement <4 x float> undef, float %add2582, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2583, <4 x float>* undef, align 16 + store <4 x float> %vecins2583, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2584 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2115,21 +2115,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp332 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2590 = fadd float %val, 0x407B1999A0000000 + %add2590 = fadd float undef, 0x407B1999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp333 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp334 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2672 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2672, <4 x float>* undef, align 16 + store <4 x float> %add2672, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp335 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2141,37 +2141,37 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2678 = insertelement <4 x float> %tmp336, float %add2677, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2678, <4 x float>* undef, align 16 + store <4 x float> %vecins2678, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp337 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2679 = extractelement <4 x float> %tmp337, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2681 = insertelement <4 x float> undef, float %val, i32 2 + %vecins2681 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2681, <4 x float>* undef, align 16 + store <4 x float> %vecins2681, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp338 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2682 = extractelement <4 x float> %tmp338, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2684 = insertelement <4 x float> undef, float %val, i32 3 + %vecins2684 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp339 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp340 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp341 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2688 = fadd float %val, 0x4063266660000000 + %add2688 = fadd float undef, 0x4063266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2692 = insertelement <4 x float> undef, float %val, i32 1 + %vecins2692 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2692, <4 x float>* undef, align 16 + store <4 x float> %vecins2692, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp342 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2183,9 +2183,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2698 = insertelement <4 x float> %tmp343, float %add2697, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2698, <4 x float>* undef, align 16 + store <4 x float> %vecins2698, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp344 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2193,7 +2193,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2700 = fadd <4 x float> %tmp345, %tmp344 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2700, <4 x float>* undef, align 16 + store <4 x float> %add2700, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp346 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2207,15 +2207,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp349 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3121 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3125 = fadd float %val, 0xC06F266660000000 + %add3125 = fadd float undef, 0xC06F266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3126 = insertelement <4 x float> undef, float %add3125, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3126, <4 x float>* undef, align 16 + store <4 x float> %vecins3126, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp350 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2227,11 +2227,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3129 = insertelement <4 x float> %tmp351, float %add3128, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3129, <4 x float>* undef, align 16 + store <4 x float> %vecins3129, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp352 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3131 = fadd float %val, 3.215000e+02 + %add3131 = fadd float undef, 3.215000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp353 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2239,15 +2239,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3134 = fadd <4 x float> %tmp354, %tmp353 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3134, <4 x float>* undef, align 16 + store <4 x float> %add3134, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp355 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3136 = fadd float %val, 0x4074333340000000 + %add3136 = fadd float undef, 0x4074333340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3140 = insertelement <4 x float> undef, float %val, i32 1 + %vecins3140 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3140, <4 x float>* undef, align 16 + store <4 x float> %vecins3140, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp356 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2259,7 +2259,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3143 = insertelement <4 x float> %tmp357, float %add3142, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3143, <4 x float>* undef, align 16 + store <4 x float> %vecins3143, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp358 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2271,15 +2271,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3146 = insertelement <4 x float> %tmp359, float %add3145, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3146, <4 x float>* undef, align 16 + store <4 x float> %vecins3146, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp360 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3272 = insertelement <4 x float> undef, float %val, i32 3 + %vecins3272 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3272, <4 x float>* undef, align 16 + store <4 x float> %vecins3272, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp361 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2287,7 +2287,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3274 = fadd <4 x float> %tmp362, %tmp361 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3274, <4 x float>* undef, align 16 + store <4 x float> %add3274, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp363 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2299,7 +2299,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3277 = insertelement <4 x float> %tmp364, float %add3276, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3277, <4 x float>* undef, align 16 + store <4 x float> %vecins3277, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp365 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2309,7 +2309,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3280 = insertelement <4 x float> undef, float %add3279, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3280, <4 x float>* undef, align 16 + store <4 x float> %vecins3280, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp366 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2321,7 +2321,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3283 = insertelement <4 x float> %tmp367, float %add3282, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3283, <4 x float>* undef, align 16 + store <4 x float> %vecins3283, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp368 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2333,7 +2333,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp369 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp370 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2345,7 +2345,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3291 = insertelement <4 x float> %tmp371, float %add3290, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3291, <4 x float>* undef, align 16 + store <4 x float> %vecins3291, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3292 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2353,11 +2353,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp373 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3328 = insertelement <4 x float> %tmp373, float %val, i32 3 + %vecins3328 = insertelement <4 x float> %tmp373, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3330 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3330, <4 x float>* undef, align 16 + store <4 x float> %add3330, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3331 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2367,7 +2367,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3333 = insertelement <4 x float> %tmp374, float %add3332, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3333, <4 x float>* undef, align 16 + store <4 x float> %vecins3333, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3334 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2385,7 +2385,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3339 = insertelement <4 x float> %tmp376, float %add3338, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3339, <4 x float>* undef, align 16 + store <4 x float> %vecins3339, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp377 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2393,13 +2393,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp378 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3342 = insertelement <4 x float> %tmp378, float %val, i32 3 + %vecins3342 = insertelement <4 x float> %tmp378, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp379 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3344 = fadd <4 x float> %tmp379, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3344, <4 x float>* undef, align 16 + store <4 x float> %add3344, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp380 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2419,15 +2419,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3350 = insertelement <4 x float> %tmp382, float %add3349, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3350, <4 x float>* undef, align 16 + store <4 x float> %vecins3350, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3352 = fadd float %val, 0xC06ACCCCC0000000 + %add3352 = fadd float undef, 0xC06ACCCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp383 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3423 = insertelement <4 x float> undef, float %val, i32 2 + %vecins3423 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3423, <4 x float>* undef, align 16 + store <4 x float> %vecins3423, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3424 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2437,9 +2437,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3426 = insertelement <4 x float> %tmp384, float %add3425, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3426, <4 x float>* undef, align 16 + store <4 x float> %vecins3426, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp385 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2457,7 +2457,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3431 = insertelement <4 x float> %tmp388, float %add3430, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3431, <4 x float>* undef, align 16 + store <4 x float> %vecins3431, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp389 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2469,15 +2469,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3434 = insertelement <4 x float> %tmp390, float %add3433, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3434, <4 x float>* undef, align 16 + store <4 x float> %vecins3434, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3435 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp391 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3437 = insertelement <4 x float> %tmp391, float %val, i32 2 + %vecins3437 = insertelement <4 x float> %tmp391, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3437, <4 x float>* undef, align 16 + store <4 x float> %vecins3437, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp392 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2485,7 +2485,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3439 = fadd float %vecext3438, 0xC071D999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp393 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2493,7 +2493,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3442 = fadd <4 x float> %tmp394, %tmp393 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3442, <4 x float>* undef, align 16 + store <4 x float> %add3442, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3443 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2509,7 +2509,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3448 = insertelement <4 x float> %tmp396, float %add3447, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3448, <4 x float>* undef, align 16 + store <4 x float> %vecins3448, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp397 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2521,15 +2521,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3451 = insertelement <4 x float> %tmp398, float %add3450, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3451, <4 x float>* undef, align 16 + store <4 x float> %vecins3451, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3453 = fadd float %val, 0xC07ADCCCC0000000 + %add3453 = fadd float undef, 0xC07ADCCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp399 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3454 = insertelement <4 x float> %tmp399, float %add3453, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3454, <4 x float>* undef, align 16 + store <4 x float> %vecins3454, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp400 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2539,7 +2539,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3459 = insertelement <4 x float> undef, float %add3458, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3459, <4 x float>* undef, align 16 + store <4 x float> %vecins3459, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp401 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2547,19 +2547,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp402 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3462 = insertelement <4 x float> %tmp402, float %val, i32 1 + %vecins3462 = insertelement <4 x float> %tmp402, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3462, <4 x float>* undef, align 16 + store <4 x float> %vecins3462, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp403 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3464 = fadd float %val, 0xC057B999A0000000 + %add3464 = fadd float undef, 0xC057B999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp404 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3465 = insertelement <4 x float> %tmp404, float %add3464, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3465, <4 x float>* undef, align 16 + store <4 x float> %vecins3465, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp405 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2569,21 +2569,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp406 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp407 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp408 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3477 = extractelement <4 x float> %tmp408, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3479 = insertelement <4 x float> undef, float %val, i32 2 + %vecins3479 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3479, <4 x float>* undef, align 16 + store <4 x float> %vecins3479, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3480 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2593,23 +2593,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3482 = insertelement <4 x float> %tmp409, float %add3481, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3482, <4 x float>* undef, align 16 + store <4 x float> %vecins3482, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp410 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3484 = fadd <4 x float> %tmp410, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3484, <4 x float>* undef, align 16 + store <4 x float> %add3484, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp411 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3486 = fadd float %val, -1.415000e+02 + %add3486 = fadd float undef, -1.415000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3487 = insertelement <4 x float> undef, float %add3486, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3487, <4 x float>* undef, align 16 + store <4 x float> %vecins3487, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp412 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2621,25 +2621,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3490 = insertelement <4 x float> %tmp413, float %add3489, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3490, <4 x float>* undef, align 16 + store <4 x float> %vecins3490, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3492 = fadd float %val, 0x4078066660000000 + %add3492 = fadd float undef, 0x4078066660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp414 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3493 = insertelement <4 x float> %tmp414, float %add3492, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3493, <4 x float>* undef, align 16 + store <4 x float> %vecins3493, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp415 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3495 = fadd float %val, 0xC0798999A0000000 + %add3495 = fadd float undef, 0xC0798999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp416 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3496 = insertelement <4 x float> %tmp416, float %add3495, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3496, <4 x float>* undef, align 16 + store <4 x float> %vecins3496, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp417 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2647,7 +2647,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3498 = fadd <4 x float> %tmp418, %tmp417 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3498, <4 x float>* undef, align 16 + store <4 x float> %add3498, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3499 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2663,25 +2663,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp420 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3506 = fadd float %val, 0xC074DB3340000000 + %add3506 = fadd float undef, 0xC074DB3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp421 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3507 = insertelement <4 x float> %tmp421, float %add3506, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3507, <4 x float>* undef, align 16 + store <4 x float> %vecins3507, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3509 = fadd float %val, 0xC066033340000000 + %add3509 = fadd float undef, 0xC066033340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp422 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp423 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3513 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2693,9 +2693,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3516 = extractelement <4 x float> %tmp425, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5414 = insertelement <4 x float> undef, float %val, i32 3 + %vecins5414 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5414, <4 x float>* undef, align 16 + store <4 x float> %vecins5414, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp426 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2703,33 +2703,33 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5416 = fadd <4 x float> %tmp427, %tmp426 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5416, <4 x float>* undef, align 16 + store <4 x float> %add5416, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp428 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5418 = fadd float %val, 0xC07ED999A0000000 + %add5418 = fadd float undef, 0xC07ED999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp429 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5419 = insertelement <4 x float> %tmp429, float %add5418, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5624 = insertelement <4 x float> undef, float %val, i32 3 + %vecins5624 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5624, <4 x float>* undef, align 16 + store <4 x float> %vecins5624, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5626 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5626, <4 x float>* undef, align 16 + store <4 x float> %add5626, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5627 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp430 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5629 = insertelement <4 x float> %tmp430, float %val, i32 0 + %vecins5629 = insertelement <4 x float> %tmp430, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5629, <4 x float>* undef, align 16 + store <4 x float> %vecins5629, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp431 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2739,13 +2739,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5632 = insertelement <4 x float> undef, float %add5631, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5632, <4 x float>* undef, align 16 + store <4 x float> %vecins5632, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp432 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5688 = insertelement <4 x float> %tmp432, float %val, i32 1 + %vecins5688 = insertelement <4 x float> %tmp432, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5688, <4 x float>* undef, align 16 + store <4 x float> %vecins5688, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp433 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2753,35 +2753,35 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp434 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5691 = insertelement <4 x float> %tmp434, float %val, i32 2 + %vecins5691 = insertelement <4 x float> %tmp434, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5691, <4 x float>* undef, align 16 + store <4 x float> %vecins5691, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5692 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp435 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5696 = fadd <4 x float> undef, %tmp435 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5696, <4 x float>* undef, align 16 + store <4 x float> %add5696, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5701 = fadd float %val, 0x4077D4CCC0000000 + %add5701 = fadd float undef, 0x4077D4CCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp436 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5702 = insertelement <4 x float> %tmp436, float %add5701, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5702, <4 x float>* undef, align 16 + store <4 x float> %vecins5702, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp437 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp438 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5705 = insertelement <4 x float> %tmp438, float %val, i32 2 + %vecins5705 = insertelement <4 x float> %tmp438, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5705, <4 x float>* undef, align 16 + store <4 x float> %vecins5705, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp439 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2793,9 +2793,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5708 = insertelement <4 x float> %tmp440, float %add5707, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5708, <4 x float>* undef, align 16 + store <4 x float> %vecins5708, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp441 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2803,7 +2803,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5710 = fadd <4 x float> %tmp442, %tmp441 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5710, <4 x float>* undef, align 16 + store <4 x float> %add5710, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp443 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2815,19 +2815,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5713 = insertelement <4 x float> %tmp444, float %add5712, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5713, <4 x float>* undef, align 16 + store <4 x float> %vecins5713, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp445 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp446 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5716 = insertelement <4 x float> %tmp446, float %val, i32 1 + %vecins5716 = insertelement <4 x float> %tmp446, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp447 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5724 = fadd <4 x float> %tmp447, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5724, <4 x float>* undef, align 16 + store <4 x float> %add5724, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp448 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2835,21 +2835,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp449 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5750 = insertelement <4 x float> %tmp449, float %val, i32 3 + %vecins5750 = insertelement <4 x float> %tmp449, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp450 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5752 = fadd <4 x float> undef, %tmp450 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5754 = fadd float %val, 0xC064033340000000 + %add5754 = fadd float undef, 0xC064033340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp451 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5755 = insertelement <4 x float> %tmp451, float %add5754, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5755, <4 x float>* undef, align 16 + store <4 x float> %vecins5755, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp452 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2861,7 +2861,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5758 = insertelement <4 x float> %tmp453, float %add5757, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5758, <4 x float>* undef, align 16 + store <4 x float> %vecins5758, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp454 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2869,9 +2869,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp455 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5761 = insertelement <4 x float> %tmp455, float %val, i32 2 + %vecins5761 = insertelement <4 x float> %tmp455, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5761, <4 x float>* undef, align 16 + store <4 x float> %vecins5761, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp456 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2883,13 +2883,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5764 = insertelement <4 x float> %tmp457, float %add5763, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5764, <4 x float>* undef, align 16 + store <4 x float> %vecins5764, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5766 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5766, <4 x float>* undef, align 16 + store <4 x float> %add5766, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp458 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2901,9 +2901,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5769 = insertelement <4 x float> %tmp459, float %add5768, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5769, <4 x float>* undef, align 16 + store <4 x float> %vecins5769, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5771 = fadd float %val, 8.000000e+00 + %add5771 = fadd float undef, 8.000000e+00 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp460 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2911,11 +2911,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp461 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5796 = fadd float %val, 0x4058ECCCC0000000 + %add5796 = fadd float undef, 0x4058ECCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5797 = insertelement <4 x float> undef, float %add5796, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5797, <4 x float>* undef, align 16 + store <4 x float> %vecins5797, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp462 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2923,7 +2923,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp463 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5800 = insertelement <4 x float> %tmp463, float %val, i32 1 + %vecins5800 = insertelement <4 x float> %tmp463, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp464 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2935,7 +2935,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5803 = insertelement <4 x float> %tmp465, float %add5802, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5803, <4 x float>* undef, align 16 + store <4 x float> %vecins5803, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp466 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2947,11 +2947,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5806 = insertelement <4 x float> %tmp467, float %add5805, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5806, <4 x float>* undef, align 16 + store <4 x float> %vecins5806, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp468 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp469 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2961,7 +2961,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp470 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp471 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2973,9 +2973,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5820 = insertelement <4 x float> %tmp472, float %add5819, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5820, <4 x float>* undef, align 16 + store <4 x float> %vecins5820, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp473 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2983,7 +2983,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5822 = fadd <4 x float> %tmp474, %tmp473 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5822, <4 x float>* undef, align 16 + store <4 x float> %add5822, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp475 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2991,7 +2991,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp476 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5825 = insertelement <4 x float> %tmp476, float %val, i32 0 + %vecins5825 = insertelement <4 x float> %tmp476, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp477 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3003,7 +3003,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5828 = insertelement <4 x float> %tmp478, float %add5827, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5828, <4 x float>* undef, align 16 + store <4 x float> %vecins5828, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp479 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3015,19 +3015,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5831 = insertelement <4 x float> %tmp480, float %add5830, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp481 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5837 = extractelement <4 x float> %tmp481, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5839 = insertelement <4 x float> undef, float %val, i32 0 + %vecins5839 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5839, <4 x float>* undef, align 16 + store <4 x float> %vecins5839, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp482 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3035,33 +3035,33 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp483 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5842 = insertelement <4 x float> %tmp483, float %val, i32 1 + %vecins5842 = insertelement <4 x float> %tmp483, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5842, <4 x float>* undef, align 16 + store <4 x float> %vecins5842, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp484 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp485 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5845 = insertelement <4 x float> %tmp485, float %val, i32 2 + %vecins5845 = insertelement <4 x float> %tmp485, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5845, <4 x float>* undef, align 16 + store <4 x float> %vecins5845, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5850 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5850, <4 x float>* undef, align 16 + store <4 x float> %add5850, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp486 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5852 = fadd float %val, 2.985000e+02 + %add5852 = fadd float undef, 2.985000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp487 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5853 = insertelement <4 x float> %tmp487, float %add5852, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5853, <4 x float>* undef, align 16 + store <4 x float> %vecins5853, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp488 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3073,17 +3073,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5856 = insertelement <4 x float> %tmp489, float %add5855, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5856, <4 x float>* undef, align 16 + store <4 x float> %vecins5856, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp490 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5858 = fadd float %val, 0x4071666660000000 + %add5858 = fadd float undef, 0x4071666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp491 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5859 = insertelement <4 x float> %tmp491, float %add5858, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5859, <4 x float>* undef, align 16 + store <4 x float> %vecins5859, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp492 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3099,19 +3099,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5901 = insertelement <4 x float> %tmp494, float %add5900, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5901, <4 x float>* undef, align 16 + store <4 x float> %vecins5901, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5914 = fadd float %val, 0x40786E6660000000 + %add5914 = fadd float undef, 0x40786E6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5918 = insertelement <4 x float> undef, float %val, i32 3 + %vecins5918 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5918, <4 x float>* undef, align 16 + store <4 x float> %vecins5918, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5920 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5920, <4 x float>* undef, align 16 + store <4 x float> %add5920, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5934 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3121,7 +3121,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp495 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp496 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3131,13 +3131,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5996 = insertelement <4 x float> undef, float %add5995, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5996, <4 x float>* undef, align 16 + store <4 x float> %vecins5996, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp497 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5997 = extractelement <4 x float> %tmp497, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp498 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3149,15 +3149,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6002 = insertelement <4 x float> %tmp499, float %add6001, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6002, <4 x float>* undef, align 16 + store <4 x float> %vecins6002, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp500 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6004 = fadd <4 x float> undef, %tmp500 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6004, <4 x float>* undef, align 16 + store <4 x float> %add6004, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp501 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3165,7 +3165,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp502 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6007 = insertelement <4 x float> %tmp502, float %val, i32 0 + %vecins6007 = insertelement <4 x float> %tmp502, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp503 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3173,9 +3173,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp504 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6024 = insertelement <4 x float> %tmp504, float %val, i32 1 + %vecins6024 = insertelement <4 x float> %tmp504, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6024, <4 x float>* undef, align 16 + store <4 x float> %vecins6024, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp505 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3187,7 +3187,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6027 = insertelement <4 x float> %tmp506, float %add6026, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6027, <4 x float>* undef, align 16 + store <4 x float> %vecins6027, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6028 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3197,15 +3197,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6030 = insertelement <4 x float> %tmp507, float %add6029, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6030, <4 x float>* undef, align 16 + store <4 x float> %vecins6030, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp508 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp509 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp510 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3213,7 +3213,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp511 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6036 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3221,17 +3221,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6038 = insertelement <4 x float> undef, float %add6037, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6038, <4 x float>* undef, align 16 + store <4 x float> %vecins6038, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp512 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6040 = fadd float %val, 0x4071ECCCC0000000 + %add6040 = fadd float undef, 0x4071ECCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp513 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6041 = insertelement <4 x float> %tmp513, float %add6040, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6041, <4 x float>* undef, align 16 + store <4 x float> %vecins6041, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp514 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3243,9 +3243,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6044 = insertelement <4 x float> %tmp515, float %add6043, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6044, <4 x float>* undef, align 16 + store <4 x float> %vecins6044, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp516 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3253,15 +3253,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6046 = fadd <4 x float> %tmp517, %tmp516 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6046, <4 x float>* undef, align 16 + store <4 x float> %add6046, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6047 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp518 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6049 = insertelement <4 x float> %tmp518, float %val, i32 0 + %vecins6049 = insertelement <4 x float> %tmp518, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6049, <4 x float>* undef, align 16 + store <4 x float> %vecins6049, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp519 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3269,19 +3269,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6051 = fadd float %vecext6050, 0x407E4E6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6055 = insertelement <4 x float> undef, float %val, i32 2 + %vecins6055 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6056 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp520 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6061 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp521 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp522 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3295,9 +3295,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6072 = insertelement <4 x float> undef, float %add6071, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6072, <4 x float>* undef, align 16 + store <4 x float> %vecins6072, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp523 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3305,7 +3305,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6074 = fadd <4 x float> %tmp524, %tmp523 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6074, <4 x float>* undef, align 16 + store <4 x float> %add6074, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp525 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3317,23 +3317,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6077 = insertelement <4 x float> %tmp526, float %add6076, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6077, <4 x float>* undef, align 16 + store <4 x float> %vecins6077, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp527 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6079 = fadd float %val, 0xC07E9B3340000000 + %add6079 = fadd float undef, 0xC07E9B3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp528 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp529 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6082 = fadd float %val, 0x407DCE6660000000 + %add6082 = fadd float undef, 0x407DCE6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6083 = insertelement <4 x float> undef, float %add6082, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6083, <4 x float>* undef, align 16 + store <4 x float> %vecins6083, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp530 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3343,9 +3343,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6086 = insertelement <4 x float> undef, float %add6085, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6086, <4 x float>* undef, align 16 + store <4 x float> %vecins6086, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp531 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3353,19 +3353,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6088 = fadd <4 x float> %tmp532, %tmp531 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6088, <4 x float>* undef, align 16 + store <4 x float> %add6088, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp533 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6089 = extractelement <4 x float> %tmp533, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6107 = fadd float %val, 0xC06A166660000000 + %add6107 = fadd float undef, 0xC06A166660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp534 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6108 = insertelement <4 x float> %tmp534, float %add6107, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6108, <4 x float>* undef, align 16 + store <4 x float> %vecins6108, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp535 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3375,7 +3375,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp536 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp537 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3395,7 +3395,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6119 = insertelement <4 x float> %tmp540, float %add6118, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6119, <4 x float>* undef, align 16 + store <4 x float> %vecins6119, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp541 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3407,7 +3407,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6122 = insertelement <4 x float> %tmp542, float %add6121, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6122, <4 x float>* undef, align 16 + store <4 x float> %vecins6122, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6123 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3415,17 +3415,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp543 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6126 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp544 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6128 = insertelement <4 x float> %tmp544, float %val, i32 3 + %vecins6128 = insertelement <4 x float> %tmp544, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6128, <4 x float>* undef, align 16 + store <4 x float> %vecins6128, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp545 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3441,7 +3441,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6133 = insertelement <4 x float> undef, float %add6132, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6133, <4 x float>* undef, align 16 + store <4 x float> %vecins6133, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6134 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3463,9 +3463,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp551 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6178 = insertelement <4 x float> %tmp551, float %val, i32 1 + %vecins6178 = insertelement <4 x float> %tmp551, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6178, <4 x float>* undef, align 16 + store <4 x float> %vecins6178, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp552 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3487,13 +3487,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6184 = insertelement <4 x float> %tmp555, float %add6183, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6184, <4 x float>* undef, align 16 + store <4 x float> %vecins6184, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp556 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6189 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6189 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6189, <4 x float>* undef, align 16 + store <4 x float> %vecins6189, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp557 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3505,7 +3505,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6192 = insertelement <4 x float> %tmp558, float %add6191, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6192, <4 x float>* undef, align 16 + store <4 x float> %vecins6192, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp559 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3519,7 +3519,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6198 = insertelement <4 x float> %tmp561, float %add6197, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp562 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3527,7 +3527,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6200 = fadd <4 x float> %tmp563, %tmp562 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6200, <4 x float>* undef, align 16 + store <4 x float> %add6200, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp564 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3535,7 +3535,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp565 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6203 = insertelement <4 x float> %tmp565, float %val, i32 0 + %vecins6203 = insertelement <4 x float> %tmp565, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp566 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3549,9 +3549,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp568 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6209 = insertelement <4 x float> %tmp568, float %val, i32 2 + %vecins6209 = insertelement <4 x float> %tmp568, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6209, <4 x float>* undef, align 16 + store <4 x float> %vecins6209, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp569 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3559,7 +3559,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp570 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6219 = fadd float %val, 0xC0596CCCC0000000 + %add6219 = fadd float undef, 0xC0596CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp571 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3573,7 +3573,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6228 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6228, <4 x float>* undef, align 16 + store <4 x float> %add6228, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6229 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3583,7 +3583,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6231 = insertelement <4 x float> %tmp573, float %add6230, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6231, <4 x float>* undef, align 16 + store <4 x float> %vecins6231, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp574 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3595,7 +3595,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6234 = insertelement <4 x float> %tmp575, float %add6233, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6234, <4 x float>* undef, align 16 + store <4 x float> %vecins6234, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6235 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3603,13 +3603,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6237 = insertelement <4 x float> undef, float %add6236, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6237, <4 x float>* undef, align 16 + store <4 x float> %vecins6237, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp576 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6245 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6245 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6245, <4 x float>* undef, align 16 + store <4 x float> %vecins6245, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp577 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3619,17 +3619,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp578 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6251 = insertelement <4 x float> undef, float %val, i32 2 + %vecins6251 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp579 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6253 = fadd float %val, 0xC0692999A0000000 + %add6253 = fadd float undef, 0xC0692999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6254 = insertelement <4 x float> undef, float %add6253, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6254, <4 x float>* undef, align 16 + store <4 x float> %vecins6254, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp580 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3637,7 +3637,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6256 = fadd <4 x float> %tmp581, %tmp580 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6256, <4 x float>* undef, align 16 + store <4 x float> %add6256, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp582 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3649,7 +3649,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6259 = insertelement <4 x float> %tmp583, float %add6258, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6259, <4 x float>* undef, align 16 + store <4 x float> %vecins6259, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp584 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3661,7 +3661,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6262 = insertelement <4 x float> %tmp585, float %add6261, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6262, <4 x float>* undef, align 16 + store <4 x float> %vecins6262, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp586 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3669,9 +3669,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp587 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6265 = insertelement <4 x float> %tmp587, float %val, i32 2 + %vecins6265 = insertelement <4 x float> %tmp587, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6265, <4 x float>* undef, align 16 + store <4 x float> %vecins6265, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp588 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3683,9 +3683,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6268 = insertelement <4 x float> %tmp589, float %add6267, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6268, <4 x float>* undef, align 16 + store <4 x float> %vecins6268, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp590 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3693,7 +3693,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6270 = fadd <4 x float> %tmp591, %tmp590 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6270, <4 x float>* undef, align 16 + store <4 x float> %add6270, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp592 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3705,7 +3705,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6273 = insertelement <4 x float> %tmp593, float %add6272, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6273, <4 x float>* undef, align 16 + store <4 x float> %vecins6273, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp594 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3717,7 +3717,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6276 = insertelement <4 x float> %tmp595, float %add6275, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6276, <4 x float>* undef, align 16 + store <4 x float> %vecins6276, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp596 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3729,7 +3729,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6279 = insertelement <4 x float> %tmp597, float %add6278, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6279, <4 x float>* undef, align 16 + store <4 x float> %vecins6279, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp598 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3739,21 +3739,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6282 = insertelement <4 x float> undef, float %add6281, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6282, <4 x float>* undef, align 16 + store <4 x float> %vecins6282, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6284 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6285 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6289 = fadd float %val, 0xC0738999A0000000 + %add6289 = fadd float undef, 0xC0738999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp599 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6293 = insertelement <4 x float> %tmp599, float %val, i32 2 + %vecins6293 = insertelement <4 x float> %tmp599, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6293, <4 x float>* undef, align 16 + store <4 x float> %vecins6293, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp600 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3763,15 +3763,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6296 = insertelement <4 x float> undef, float %add6295, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6296, <4 x float>* undef, align 16 + store <4 x float> %vecins6296, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp601 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6298 = fadd <4 x float> undef, %tmp601 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6298, <4 x float>* undef, align 16 + store <4 x float> %add6298, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp602 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3783,7 +3783,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6301 = insertelement <4 x float> %tmp603, float %add6300, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6301, <4 x float>* undef, align 16 + store <4 x float> %vecins6301, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp604 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3795,7 +3795,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6304 = insertelement <4 x float> %tmp605, float %add6303, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6304, <4 x float>* undef, align 16 + store <4 x float> %vecins6304, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp606 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3805,7 +3805,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6307 = insertelement <4 x float> undef, float %add6306, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6307, <4 x float>* undef, align 16 + store <4 x float> %vecins6307, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp607 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3817,9 +3817,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6310 = insertelement <4 x float> %tmp608, float %add6309, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6310, <4 x float>* undef, align 16 + store <4 x float> %vecins6310, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp609 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3827,7 +3827,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6312 = fadd <4 x float> %tmp610, %tmp609 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6312, <4 x float>* undef, align 16 + store <4 x float> %add6312, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp611 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3849,13 +3849,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6657 = insertelement <4 x float> %tmp614, float %add6656, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6657, <4 x float>* undef, align 16 + store <4 x float> %vecins6657, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6660 = insertelement <4 x float> undef, float %val, i32 3 + %vecins6660 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6660, <4 x float>* undef, align 16 + store <4 x float> %vecins6660, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp615 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3867,7 +3867,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6665 = insertelement <4 x float> %tmp616, float %add6664, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp617 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3875,15 +3875,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp618 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp619 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6676 = fadd <4 x float> %tmp619, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6676, <4 x float>* undef, align 16 + store <4 x float> %add6676, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp620 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3901,7 +3901,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp622 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp623 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3913,7 +3913,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6685 = insertelement <4 x float> %tmp624, float %add6684, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6685, <4 x float>* undef, align 16 + store <4 x float> %vecins6685, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp625 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3925,15 +3925,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6688 = insertelement <4 x float> %tmp626, float %add6687, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6688, <4 x float>* undef, align 16 + store <4 x float> %vecins6688, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp627 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6690 = fadd <4 x float> undef, %tmp627 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6690, <4 x float>* undef, align 16 + store <4 x float> %add6690, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp628 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3945,7 +3945,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6693 = insertelement <4 x float> %tmp629, float %add6692, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6693, <4 x float>* undef, align 16 + store <4 x float> %vecins6693, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp630 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3957,7 +3957,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6696 = insertelement <4 x float> %tmp631, float %add6695, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6696, <4 x float>* undef, align 16 + store <4 x float> %vecins6696, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp632 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3969,7 +3969,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6699 = insertelement <4 x float> %tmp633, float %add6698, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6699, <4 x float>* undef, align 16 + store <4 x float> %vecins6699, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp634 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3981,17 +3981,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6702 = insertelement <4 x float> %tmp635, float %add6701, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6702, <4 x float>* undef, align 16 + store <4 x float> %vecins6702, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp636 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp637 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6707 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6707 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6707, <4 x float>* undef, align 16 + store <4 x float> %vecins6707, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp638 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3999,7 +3999,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp639 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp640 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4031,21 +4031,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp645 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6726 = fadd float %val, 0x4059B999A0000000 + %add6726 = fadd float undef, 0x4059B999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp646 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6727 = insertelement <4 x float> %tmp646, float %add6726, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6727, <4 x float>* undef, align 16 + store <4 x float> %vecins6727, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6728 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6729 = fadd float %vecext6728, 0xC073466660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp647 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4053,7 +4053,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6732 = fadd <4 x float> %tmp648, %tmp647 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6732, <4 x float>* undef, align 16 + store <4 x float> %add6732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp649 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4065,7 +4065,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6735 = insertelement <4 x float> %tmp650, float %add6734, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6735, <4 x float>* undef, align 16 + store <4 x float> %vecins6735, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp651 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4077,7 +4077,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6738 = insertelement <4 x float> %tmp652, float %add6737, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6738, <4 x float>* undef, align 16 + store <4 x float> %vecins6738, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp653 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4089,7 +4089,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6741 = insertelement <4 x float> %tmp654, float %add6740, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6741, <4 x float>* undef, align 16 + store <4 x float> %vecins6741, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp655 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4101,7 +4101,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6744 = insertelement <4 x float> %tmp656, float %add6743, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6744, <4 x float>* undef, align 16 + store <4 x float> %vecins6744, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp657 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4109,21 +4109,21 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6746 = fadd <4 x float> %tmp658, %tmp657 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6746, <4 x float>* undef, align 16 + store <4 x float> %add6746, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp659 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6749 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6749 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6749, <4 x float>* undef, align 16 + store <4 x float> %vecins6749, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp660 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6751 = fadd float %val, 0x4075DE6660000000 + %add6751 = fadd float undef, 0x4075DE6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6752 = insertelement <4 x float> undef, float %add6751, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6752, <4 x float>* undef, align 16 + store <4 x float> %vecins6752, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp661 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4133,7 +4133,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6755 = insertelement <4 x float> undef, float %add6754, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6755, <4 x float>* undef, align 16 + store <4 x float> %vecins6755, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp662 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4145,15 +4145,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6758 = insertelement <4 x float> %tmp663, float %add6757, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6758, <4 x float>* undef, align 16 + store <4 x float> %vecins6758, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp664 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6760 = fadd <4 x float> undef, %tmp664 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6760, <4 x float>* undef, align 16 + store <4 x float> %add6760, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp665 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4165,9 +4165,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp666 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp667 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4183,7 +4183,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6777 = insertelement <4 x float> %tmp669, float %add6776, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6777, <4 x float>* undef, align 16 + store <4 x float> %vecins6777, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp670 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4195,9 +4195,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6784 = extractelement <4 x float> %tmp671, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6875 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6875 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6875, <4 x float>* undef, align 16 + store <4 x float> %vecins6875, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp672 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4207,15 +4207,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6878 = insertelement <4 x float> undef, float %add6877, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6878, <4 x float>* undef, align 16 + store <4 x float> %vecins6878, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6888 = fadd float %val, 0x4057CCCCC0000000 + %add6888 = fadd float undef, 0x4057CCCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp673 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6889 = insertelement <4 x float> %tmp673, float %add6888, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6889, <4 x float>* undef, align 16 + store <4 x float> %vecins6889, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp674 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4227,7 +4227,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6892 = insertelement <4 x float> %tmp675, float %add6891, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6892, <4 x float>* undef, align 16 + store <4 x float> %vecins6892, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp676 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4239,7 +4239,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6895 = insertelement <4 x float> %tmp677, float %add6894, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6895, <4 x float>* undef, align 16 + store <4 x float> %vecins6895, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp678 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4249,7 +4249,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6900 = fadd <4 x float> %tmp680, %tmp679 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6900, <4 x float>* undef, align 16 + store <4 x float> %add6900, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp681 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4261,9 +4261,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6903 = insertelement <4 x float> %tmp682, float %add6902, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6903, <4 x float>* undef, align 16 + store <4 x float> %vecins6903, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6905 = fadd float %val, 0x4031B33340000000 + %add6905 = fadd float undef, 0x4031B33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp683 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4271,9 +4271,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp684 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6912 = insertelement <4 x float> %tmp684, float %val, i32 3 + %vecins6912 = insertelement <4 x float> %tmp684, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp685 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4281,13 +4281,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6914 = fadd <4 x float> %tmp686, %tmp685 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6914, <4 x float>* undef, align 16 + store <4 x float> %add6914, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6915 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6920 = insertelement <4 x float> undef, float %val, i32 1 + %vecins6920 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6920, <4 x float>* undef, align 16 + store <4 x float> %vecins6920, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6921 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4295,11 +4295,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp687 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6926 = insertelement <4 x float> %tmp687, float %val, i32 3 + %vecins6926 = insertelement <4 x float> %tmp687, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6926, <4 x float>* undef, align 16 + store <4 x float> %vecins6926, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp688 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4307,13 +4307,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6928 = fadd <4 x float> %tmp689, %tmp688 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6928, <4 x float>* undef, align 16 + store <4 x float> %add6928, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6930 = fadd float %val, -4.590000e+02 + %add6930 = fadd float undef, -4.590000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6931 = insertelement <4 x float> undef, float %add6930, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6931, <4 x float>* undef, align 16 + store <4 x float> %vecins6931, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp690 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4323,7 +4323,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp691 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp692 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4349,15 +4349,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp695 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6950 = fadd float %val, 0xC078F33340000000 + %add6950 = fadd float undef, 0xC078F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp696 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6951 = insertelement <4 x float> %tmp696, float %add6950, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6951, <4 x float>* undef, align 16 + store <4 x float> %vecins6951, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp697 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4369,7 +4369,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6954 = insertelement <4 x float> %tmp698, float %add6953, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6954, <4 x float>* undef, align 16 + store <4 x float> %vecins6954, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp699 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4377,7 +4377,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6956 = fadd <4 x float> %tmp700, %tmp699 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6956, <4 x float>* undef, align 16 + store <4 x float> %add6956, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp701 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4389,7 +4389,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6959 = insertelement <4 x float> %tmp702, float %add6958, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6959, <4 x float>* undef, align 16 + store <4 x float> %vecins6959, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp703 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4401,15 +4401,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6965 = insertelement <4 x float> %tmp704, float %add6964, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6965, <4 x float>* undef, align 16 + store <4 x float> %vecins6965, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6975 = fadd float %val, 0x406AF33340000000 + %add6975 = fadd float undef, 0x406AF33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp705 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6976 = insertelement <4 x float> %tmp705, float %add6975, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6976, <4 x float>* undef, align 16 + store <4 x float> %vecins6976, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp706 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4417,7 +4417,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6984 = fadd <4 x float> %tmp707, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6984, <4 x float>* undef, align 16 + store <4 x float> %add6984, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp708 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4429,7 +4429,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6987 = insertelement <4 x float> %tmp709, float %add6986, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6987, <4 x float>* undef, align 16 + store <4 x float> %vecins6987, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp710 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4439,11 +4439,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp711 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6996 = insertelement <4 x float> %tmp711, float %val, i32 3 + %vecins6996 = insertelement <4 x float> %tmp711, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6996, <4 x float>* undef, align 16 + store <4 x float> %vecins6996, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp712 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4451,7 +4451,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6998 = fadd <4 x float> %tmp713, %tmp712 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6998, <4 x float>* undef, align 16 + store <4 x float> %add6998, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp714 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4463,7 +4463,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7001 = insertelement <4 x float> %tmp715, float %add7000, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7001, <4 x float>* undef, align 16 + store <4 x float> %vecins7001, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp716 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4475,11 +4475,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7004 = insertelement <4 x float> %tmp717, float %add7003, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp718 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7140 = fadd float %val, 0x403D333340000000 + %add7140 = fadd float undef, 0x403D333340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7141 = insertelement <4 x float> undef, float %add7140, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4489,7 +4489,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7144 = insertelement <4 x float> undef, float %add7143, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp719 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4501,15 +4501,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7150 = insertelement <4 x float> %tmp720, float %add7149, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7150, <4 x float>* undef, align 16 + store <4 x float> %vecins7150, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp721 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7152 = fadd <4 x float> %tmp721, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7152, <4 x float>* undef, align 16 + store <4 x float> %add7152, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7156 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4519,7 +4519,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7158 = insertelement <4 x float> %tmp722, float %add7157, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7158, <4 x float>* undef, align 16 + store <4 x float> %vecins7158, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp723 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4531,13 +4531,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7161 = insertelement <4 x float> %tmp724, float %add7160, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7161, <4 x float>* undef, align 16 + store <4 x float> %vecins7161, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7168 = fadd float %val, 0xC072F199A0000000 + %add7168 = fadd float undef, 0xC072F199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp725 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7170 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4545,11 +4545,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7172 = insertelement <4 x float> undef, float %add7171, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7172, <4 x float>* undef, align 16 + store <4 x float> %vecins7172, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7173 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp726 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4559,7 +4559,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7421 = insertelement <4 x float> undef, float %add7420, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7421, <4 x float>* undef, align 16 + store <4 x float> %vecins7421, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp727 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4571,7 +4571,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7424 = insertelement <4 x float> %tmp728, float %add7423, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7424, <4 x float>* undef, align 16 + store <4 x float> %vecins7424, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp729 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4583,11 +4583,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7427 = insertelement <4 x float> %tmp730, float %add7426, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7427, <4 x float>* undef, align 16 + store <4 x float> %vecins7427, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7428 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp731 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4599,9 +4599,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7570 = insertelement <4 x float> %tmp732, float %add7569, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7570, <4 x float>* undef, align 16 + store <4 x float> %vecins7570, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp733 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4609,7 +4609,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7572 = fadd <4 x float> %tmp734, %tmp733 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7572, <4 x float>* undef, align 16 + store <4 x float> %add7572, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7573 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4619,11 +4619,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7575 = insertelement <4 x float> %tmp735, float %add7574, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7575, <4 x float>* undef, align 16 + store <4 x float> %vecins7575, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp736 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7577 = fadd float %val, 0xC051666660000000 + %add7577 = fadd float undef, 0xC051666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp737 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4635,7 +4635,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7581 = insertelement <4 x float> undef, float %add7580, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7581, <4 x float>* undef, align 16 + store <4 x float> %vecins7581, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp739 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4647,7 +4647,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7584 = insertelement <4 x float> %tmp740, float %add7583, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp741 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4655,7 +4655,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7586 = fadd <4 x float> %tmp742, %tmp741 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7586, <4 x float>* undef, align 16 + store <4 x float> %add7586, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp743 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4665,7 +4665,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp744 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp745 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4677,15 +4677,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7592 = insertelement <4 x float> %tmp746, float %add7591, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7592, <4 x float>* undef, align 16 + store <4 x float> %vecins7592, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp747 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7593 = extractelement <4 x float> %tmp747, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7595 = insertelement <4 x float> undef, float %val, i32 2 + %vecins7595 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7595, <4 x float>* undef, align 16 + store <4 x float> %vecins7595, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp748 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4693,17 +4693,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7597 = fadd float %vecext7596, 0x407E666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp749 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7616 = fadd float %val, 0xC04DE66660000000 + %add7616 = fadd float undef, 0xC04DE66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp750 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7617 = insertelement <4 x float> %tmp750, float %add7616, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7617, <4 x float>* undef, align 16 + store <4 x float> %vecins7617, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp751 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4715,17 +4715,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7620 = insertelement <4 x float> %tmp752, float %add7619, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7620, <4 x float>* undef, align 16 + store <4 x float> %vecins7620, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp753 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7622 = fadd float %val, 0xC054B999A0000000 + %add7622 = fadd float undef, 0xC054B999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp754 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7626 = insertelement <4 x float> undef, float %val, i32 3 + %vecins7626 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7626, <4 x float>* undef, align 16 + store <4 x float> %vecins7626, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp755 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4733,7 +4733,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7628 = fadd <4 x float> %tmp756, %tmp755 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7628, <4 x float>* undef, align 16 + store <4 x float> %add7628, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp757 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4745,13 +4745,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7631 = insertelement <4 x float> %tmp758, float %add7630, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7639 = fadd float %val, 0x407C5999A0000000 + %add7639 = fadd float undef, 0x407C5999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp759 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7640 = insertelement <4 x float> %tmp759, float %add7639, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp760 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4759,9 +4759,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp761 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7644 = fadd float %val, 0xC0758999A0000000 + %add7644 = fadd float undef, 0xC0758999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp762 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4773,7 +4773,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7648 = insertelement <4 x float> %tmp763, float %add7647, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7648, <4 x float>* undef, align 16 + store <4 x float> %vecins7648, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp764 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4785,7 +4785,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7651 = insertelement <4 x float> %tmp765, float %add7650, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7651, <4 x float>* undef, align 16 + store <4 x float> %vecins7651, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp766 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4797,7 +4797,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7654 = insertelement <4 x float> %tmp767, float %add7653, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7654, <4 x float>* undef, align 16 + store <4 x float> %vecins7654, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp768 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4805,7 +4805,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7656 = fadd <4 x float> %tmp769, %tmp768 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7656, <4 x float>* undef, align 16 + store <4 x float> %add7656, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp770 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4817,7 +4817,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7659 = insertelement <4 x float> %tmp771, float %add7658, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7659, <4 x float>* undef, align 16 + store <4 x float> %vecins7659, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp772 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4829,7 +4829,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7662 = insertelement <4 x float> %tmp773, float %add7661, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7662, <4 x float>* undef, align 16 + store <4 x float> %vecins7662, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp774 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4841,7 +4841,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7665 = insertelement <4 x float> %tmp775, float %add7664, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7665, <4 x float>* undef, align 16 + store <4 x float> %vecins7665, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp776 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4851,7 +4851,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7668 = insertelement <4 x float> undef, float %add7667, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7668, <4 x float>* undef, align 16 + store <4 x float> %vecins7668, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp777 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4873,23 +4873,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp781 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp782 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7731 = fadd float %val, 1.900000e+02 + %add7731 = fadd float undef, 1.900000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp783 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7732 = insertelement <4 x float> %tmp783, float %add7731, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7732, <4 x float>* undef, align 16 + store <4 x float> %vecins7732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp784 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7735 = insertelement <4 x float> %tmp784, float %val, i32 2 + %vecins7735 = insertelement <4 x float> %tmp784, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7735, <4 x float>* undef, align 16 + store <4 x float> %vecins7735, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp785 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4897,11 +4897,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7737 = fadd float %vecext7736, 0xC06AF66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7850 = insertelement <4 x float> undef, float %val, i32 3 + %vecins7850 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7850, <4 x float>* undef, align 16 + store <4 x float> %vecins7850, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp786 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4909,7 +4909,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7852 = fadd <4 x float> %tmp787, %tmp786 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7852, <4 x float>* undef, align 16 + store <4 x float> %add7852, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp788 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4921,13 +4921,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9398 = insertelement <4 x float> %tmp789, float %add9397, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9398, <4 x float>* undef, align 16 + store <4 x float> %vecins9398, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9399 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp790 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9401 = insertelement <4 x float> %tmp790, float %val, i32 2 + %vecins9401 = insertelement <4 x float> %tmp790, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp791 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4939,11 +4939,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9404 = insertelement <4 x float> %tmp792, float %add9403, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9404, <4 x float>* undef, align 16 + store <4 x float> %vecins9404, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp793 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp794 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4959,7 +4959,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp796 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp797 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4971,7 +4971,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9415 = insertelement <4 x float> %tmp798, float %add9414, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9415, <4 x float>* undef, align 16 + store <4 x float> %vecins9415, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp799 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4983,9 +4983,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9418 = insertelement <4 x float> %tmp800, float %add9417, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9418, <4 x float>* undef, align 16 + store <4 x float> %vecins9418, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp801 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4993,7 +4993,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9420 = fadd <4 x float> %tmp802, %tmp801 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9420, <4 x float>* undef, align 16 + store <4 x float> %add9420, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp803 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5001,9 +5001,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp804 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9423 = insertelement <4 x float> %tmp804, float %val, i32 0 + %vecins9423 = insertelement <4 x float> %tmp804, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9423, <4 x float>* undef, align 16 + store <4 x float> %vecins9423, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp805 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5015,17 +5015,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9426 = insertelement <4 x float> %tmp806, float %add9425, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9426, <4 x float>* undef, align 16 + store <4 x float> %vecins9426, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp807 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9428 = fadd float %val, 0xC065466660000000 + %add9428 = fadd float undef, 0xC065466660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp808 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9429 = insertelement <4 x float> %tmp808, float %add9428, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9429, <4 x float>* undef, align 16 + store <4 x float> %vecins9429, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp809 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5037,7 +5037,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9432 = insertelement <4 x float> %tmp810, float %add9431, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp811 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5045,7 +5045,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9434 = fadd <4 x float> %tmp812, %tmp811 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9436 = fadd float %val, -3.185000e+02 + %add9436 = fadd float undef, -3.185000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp813 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5053,7 +5053,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp814 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp815 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5065,7 +5065,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9443 = insertelement <4 x float> %tmp816, float %add9442, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9443, <4 x float>* undef, align 16 + store <4 x float> %vecins9443, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp817 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5077,7 +5077,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9446 = insertelement <4 x float> %tmp818, float %add9445, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9446, <4 x float>* undef, align 16 + store <4 x float> %vecins9446, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp819 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5085,23 +5085,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9448 = fadd <4 x float> %tmp820, %tmp819 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9448, <4 x float>* undef, align 16 + store <4 x float> %add9448, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9450 = fadd float %val, 0xC0718199A0000000 + %add9450 = fadd float undef, 0xC0718199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp821 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9451 = insertelement <4 x float> %tmp821, float %add9450, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9451, <4 x float>* undef, align 16 + store <4 x float> %vecins9451, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp822 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp823 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9454 = insertelement <4 x float> %tmp823, float %val, i32 1 + %vecins9454 = insertelement <4 x float> %tmp823, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9454, <4 x float>* undef, align 16 + store <4 x float> %vecins9454, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp824 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5113,23 +5113,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9457 = insertelement <4 x float> %tmp825, float %add9456, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9457, <4 x float>* undef, align 16 + store <4 x float> %vecins9457, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9458 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp826 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9460 = insertelement <4 x float> %tmp826, float %val, i32 3 + %vecins9460 = insertelement <4 x float> %tmp826, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9460, <4 x float>* undef, align 16 + store <4 x float> %vecins9460, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp827 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9462 = fadd <4 x float> %tmp827, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9462, <4 x float>* undef, align 16 + store <4 x float> %add9462, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp828 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5137,23 +5137,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp829 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9465 = insertelement <4 x float> %tmp829, float %val, i32 0 + %vecins9465 = insertelement <4 x float> %tmp829, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9467 = fadd float %val, 0x405D666660000000 + %add9467 = fadd float undef, 0x405D666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp830 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9468 = insertelement <4 x float> %tmp830, float %add9467, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9468, <4 x float>* undef, align 16 + store <4 x float> %vecins9468, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp831 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9470 = fadd float %val, 0x4077033340000000 + %add9470 = fadd float undef, 0x4077033340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp832 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9472 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5163,9 +5163,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9474 = insertelement <4 x float> %tmp833, float %add9473, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9474, <4 x float>* undef, align 16 + store <4 x float> %vecins9474, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp834 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5173,7 +5173,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9476 = fadd <4 x float> %tmp835, %tmp834 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9476, <4 x float>* undef, align 16 + store <4 x float> %add9476, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp836 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5185,17 +5185,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9479 = insertelement <4 x float> %tmp837, float %add9478, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9479, <4 x float>* undef, align 16 + store <4 x float> %vecins9479, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp838 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9481 = fadd float %val, 0x407BE33340000000 + %add9481 = fadd float undef, 0x407BE33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp839 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9482 = insertelement <4 x float> %tmp839, float %add9481, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9482, <4 x float>* undef, align 16 + store <4 x float> %vecins9482, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9483 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5205,7 +5205,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9485 = insertelement <4 x float> %tmp840, float %add9484, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9485, <4 x float>* undef, align 16 + store <4 x float> %vecins9485, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp841 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5215,13 +5215,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp842 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp843 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp844 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5229,15 +5229,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9492 = fadd float %vecext9491, 0x407C166660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9495 = fadd float %val, 0x407DBB3340000000 + %add9495 = fadd float undef, 0x407DBB3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp845 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9496 = insertelement <4 x float> %tmp845, float %add9495, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9496, <4 x float>* undef, align 16 + store <4 x float> %vecins9496, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp846 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5249,41 +5249,41 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9499 = insertelement <4 x float> %tmp847, float %add9498, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9499, <4 x float>* undef, align 16 + store <4 x float> %vecins9499, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp848 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9501 = fadd float %val, 0x407D5CCCC0000000 + %add9501 = fadd float undef, 0x407D5CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp849 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9502 = insertelement <4 x float> %tmp849, float %add9501, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9502, <4 x float>* undef, align 16 + store <4 x float> %vecins9502, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp850 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9504 = fadd <4 x float> %tmp850, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9504, <4 x float>* undef, align 16 + store <4 x float> %add9504, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp851 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9506 = fadd float %val, 0x4076EE6660000000 + %add9506 = fadd float undef, 0x4076EE6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp852 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9507 = insertelement <4 x float> %tmp852, float %add9506, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9507, <4 x float>* undef, align 16 + store <4 x float> %vecins9507, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp853 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9509 = fadd float %val, 0xC0535999A0000000 + %add9509 = fadd float undef, 0xC0535999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp854 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp855 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5295,7 +5295,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9513 = insertelement <4 x float> %tmp856, float %add9512, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9513, <4 x float>* undef, align 16 + store <4 x float> %vecins9513, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp857 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5303,11 +5303,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp858 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9516 = insertelement <4 x float> %tmp858, float %val, i32 3 + %vecins9516 = insertelement <4 x float> %tmp858, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9516, <4 x float>* undef, align 16 + store <4 x float> %vecins9516, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp859 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5319,9 +5319,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp862 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9521 = insertelement <4 x float> %tmp862, float %val, i32 0 + %vecins9521 = insertelement <4 x float> %tmp862, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9521, <4 x float>* undef, align 16 + store <4 x float> %vecins9521, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp863 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5333,25 +5333,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9524 = insertelement <4 x float> %tmp864, float %add9523, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9524, <4 x float>* undef, align 16 + store <4 x float> %vecins9524, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp865 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9526 = fadd float %val, 0x4072833340000000 + %add9526 = fadd float undef, 0x4072833340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp866 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9527 = insertelement <4 x float> %tmp866, float %add9526, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9527, <4 x float>* undef, align 16 + store <4 x float> %vecins9527, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp867 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9530 = insertelement <4 x float> undef, float %val, i32 3 + %vecins9530 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9530, <4 x float>* undef, align 16 + store <4 x float> %vecins9530, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp868 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5363,9 +5363,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp870 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9535 = insertelement <4 x float> %tmp870, float %val, i32 0 + %vecins9535 = insertelement <4 x float> %tmp870, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9535, <4 x float>* undef, align 16 + store <4 x float> %vecins9535, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp871 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5377,7 +5377,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9538 = insertelement <4 x float> %tmp872, float %add9537, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9538, <4 x float>* undef, align 16 + store <4 x float> %vecins9538, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp873 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5385,17 +5385,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9543 = fadd float %vecext9542, 0x4050D999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9576 = fadd float %val, 0x40219999A0000000 + %add9576 = fadd float undef, 0x40219999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9577 = insertelement <4 x float> undef, float %add9576, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9577, <4 x float>* undef, align 16 + store <4 x float> %vecins9577, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp874 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9580 = insertelement <4 x float> undef, float %val, i32 1 + %vecins9580 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9580, <4 x float>* undef, align 16 + store <4 x float> %vecins9580, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp875 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5407,11 +5407,11 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9583 = insertelement <4 x float> %tmp876, float %add9582, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9583, <4 x float>* undef, align 16 + store <4 x float> %vecins9583, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp877 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9673 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5421,7 +5421,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9675 = insertelement <4 x float> %tmp878, float %add9674, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9675, <4 x float>* undef, align 16 + store <4 x float> %vecins9675, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9676 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5441,7 +5441,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9681 = insertelement <4 x float> %tmp881, float %add9680, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9681, <4 x float>* undef, align 16 + store <4 x float> %vecins9681, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp882 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5451,7 +5451,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9686 = fadd <4 x float> %tmp883, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9686, <4 x float>* undef, align 16 + store <4 x float> %add9686, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp884 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5481,19 +5481,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9695 = insertelement <4 x float> %tmp888, float %add9694, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9695, <4 x float>* undef, align 16 + store <4 x float> %vecins9695, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp889 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9697 = fadd float %val, 0x4058D33340000000 + %add9697 = fadd float undef, 0x4058D33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp890 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9698 = insertelement <4 x float> %tmp890, float %add9697, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9698, <4 x float>* undef, align 16 + store <4 x float> %vecins9698, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp891 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5509,7 +5509,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9703 = insertelement <4 x float> %tmp893, float %add9702, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9703, <4 x float>* undef, align 16 + store <4 x float> %vecins9703, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp894 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5521,7 +5521,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9706 = insertelement <4 x float> %tmp895, float %add9705, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9706, <4 x float>* undef, align 16 + store <4 x float> %vecins9706, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9707 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5531,23 +5531,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9709 = insertelement <4 x float> %tmp896, float %add9708, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9709, <4 x float>* undef, align 16 + store <4 x float> %vecins9709, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp897 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9710 = extractelement <4 x float> %tmp897, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9712 = insertelement <4 x float> undef, float %val, i32 3 + %vecins9712 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9712, <4 x float>* undef, align 16 + store <4 x float> %vecins9712, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp898 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9714 = fadd <4 x float> undef, %tmp898 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9714, <4 x float>* undef, align 16 + store <4 x float> %add9714, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp899 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5555,9 +5555,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp900 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9717 = insertelement <4 x float> %tmp900, float %val, i32 0 + %vecins9717 = insertelement <4 x float> %tmp900, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9717, <4 x float>* undef, align 16 + store <4 x float> %vecins9717, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp901 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5569,7 +5569,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9720 = insertelement <4 x float> %tmp902, float %add9719, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9720, <4 x float>* undef, align 16 + store <4 x float> %vecins9720, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp903 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5581,7 +5581,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9723 = insertelement <4 x float> %tmp904, float %add9722, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9723, <4 x float>* undef, align 16 + store <4 x float> %vecins9723, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp905 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5593,15 +5593,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9726 = insertelement <4 x float> %tmp906, float %add9725, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9726, <4 x float>* undef, align 16 + store <4 x float> %vecins9726, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp907 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9728 = fadd <4 x float> %tmp907, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9728, <4 x float>* undef, align 16 + store <4 x float> %add9728, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp908 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5613,17 +5613,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9731 = insertelement <4 x float> %tmp909, float %add9730, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9731, <4 x float>* undef, align 16 + store <4 x float> %vecins9731, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp910 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9733 = fadd float %val, 0xC050F33340000000 + %add9733 = fadd float undef, 0xC050F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp911 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9734 = insertelement <4 x float> %tmp911, float %add9733, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9734, <4 x float>* undef, align 16 + store <4 x float> %vecins9734, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp912 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5635,23 +5635,23 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9737 = insertelement <4 x float> %tmp913, float %add9736, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9737, <4 x float>* undef, align 16 + store <4 x float> %vecins9737, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp914 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9738 = extractelement <4 x float> %tmp914, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9740 = insertelement <4 x float> undef, float %val, i32 3 + %vecins9740 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9740, <4 x float>* undef, align 16 + store <4 x float> %vecins9740, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp915 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp916 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp917 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5661,7 +5661,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9745 = insertelement <4 x float> undef, float %add9744, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9745, <4 x float>* undef, align 16 + store <4 x float> %vecins9745, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp918 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5673,7 +5673,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9748 = insertelement <4 x float> %tmp919, float %add9747, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9748, <4 x float>* undef, align 16 + store <4 x float> %vecins9748, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp920 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5685,7 +5685,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9751 = insertelement <4 x float> %tmp921, float %add9750, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9751, <4 x float>* undef, align 16 + store <4 x float> %vecins9751, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp922 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5697,9 +5697,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9754 = insertelement <4 x float> %tmp923, float %add9753, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9754, <4 x float>* undef, align 16 + store <4 x float> %vecins9754, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral9755 + store <4 x float> , <4 x float>* %.compoundliteral9755 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp924 = load <4 x float>, <4 x float>* %.compoundliteral9755 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5717,7 +5717,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9759 = insertelement <4 x float> %tmp927, float %add9758, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9759, <4 x float>* undef, align 16 + store <4 x float> %vecins9759, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp928 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5729,17 +5729,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9762 = insertelement <4 x float> %tmp929, float %add9761, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9762, <4 x float>* undef, align 16 + store <4 x float> %vecins9762, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp930 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9764 = fadd float %val, 0xC060E66660000000 + %add9764 = fadd float undef, 0xC060E66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp931 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9765 = insertelement <4 x float> %tmp931, float %add9764, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9765, <4 x float>* undef, align 16 + store <4 x float> %vecins9765, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp932 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5751,9 +5751,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9768 = insertelement <4 x float> %tmp933, float %add9767, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9768, <4 x float>* undef, align 16 + store <4 x float> %vecins9768, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral9769 + store <4 x float> , <4 x float>* %.compoundliteral9769 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp934 = load <4 x float>, <4 x float>* %.compoundliteral9769 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5761,7 +5761,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9770 = fadd <4 x float> %tmp935, %tmp934 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9770, <4 x float>* undef, align 16 + store <4 x float> %add9770, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp936 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5773,7 +5773,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9773 = insertelement <4 x float> %tmp937, float %add9772, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9773, <4 x float>* undef, align 16 + store <4 x float> %vecins9773, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp938 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5785,25 +5785,25 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9776 = insertelement <4 x float> %tmp939, float %add9775, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9776, <4 x float>* undef, align 16 + store <4 x float> %vecins9776, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9816 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp940 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9818 = insertelement <4 x float> %tmp940, float %val, i32 1 + %vecins9818 = insertelement <4 x float> %tmp940, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp941 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10388 = fadd float %val, 4.755000e+02 + %add10388 = fadd float undef, 4.755000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp942 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10389 = insertelement <4 x float> %tmp942, float %add10388, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10389, <4 x float>* undef, align 16 + store <4 x float> %vecins10389, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp943 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5815,19 +5815,19 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10392 = insertelement <4 x float> %tmp944, float %add10391, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10392, <4 x float>* undef, align 16 + store <4 x float> %vecins10392, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp945 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp946 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10405 = fadd float %val, -5.650000e+01 + %add10405 = fadd float undef, -5.650000e+01 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp947 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10406 = insertelement <4 x float> %tmp947, float %add10405, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10406, <4 x float>* undef, align 16 + store <4 x float> %vecins10406, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp948 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5839,7 +5839,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10409 = insertelement <4 x float> %tmp949, float %add10408, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10409, <4 x float>* undef, align 16 + store <4 x float> %vecins10409, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp950 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5849,9 +5849,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp951 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral10413 + store <4 x float> , <4 x float>* %.compoundliteral10413 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp952 = load <4 x float>, <4 x float>* %.compoundliteral10413 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5859,7 +5859,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add10414 = fadd <4 x float> %tmp953, %tmp952 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add10414, <4 x float>* undef, align 16 + store <4 x float> %add10414, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp954 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5871,7 +5871,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10417 = insertelement <4 x float> %tmp955, float %add10416, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10417, <4 x float>* undef, align 16 + store <4 x float> %vecins10417, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp956 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5883,15 +5883,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10420 = insertelement <4 x float> %tmp957, float %add10419, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10420, <4 x float>* undef, align 16 + store <4 x float> %vecins10420, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10422 = fadd float %val, 0xC0662CCCC0000000 + %add10422 = fadd float undef, 0xC0662CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext10424 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp958 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5899,7 +5899,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add10428 = fadd <4 x float> %tmp959, %tmp958 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add10428, <4 x float>* undef, align 16 + store <4 x float> %add10428, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp960 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5909,13 +5909,13 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp961 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10436 = fadd float %val, 0xC06AF33340000000 + %add10436 = fadd float undef, 0xC06AF33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp962 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10437 = insertelement <4 x float> %tmp962, float %add10436, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10437, <4 x float>* undef, align 16 + store <4 x float> %vecins10437, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext10438 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5925,9 +5925,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10440 = insertelement <4 x float> %tmp963, float %add10439, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10440, <4 x float>* undef, align 16 + store <4 x float> %vecins10440, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp964 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5941,7 +5941,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10445 = insertelement <4 x float> %tmp966, float %add10444, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10445, <4 x float>* undef, align 16 + store <4 x float> %vecins10445, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp967 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5953,7 +5953,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10448 = insertelement <4 x float> %tmp968, float %add10447, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10448, <4 x float>* undef, align 16 + store <4 x float> %vecins10448, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp969 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5965,7 +5965,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10451 = insertelement <4 x float> %tmp970, float %add10450, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10451, <4 x float>* undef, align 16 + store <4 x float> %vecins10451, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp971 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5975,7 +5975,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10454 = insertelement <4 x float> undef, float %add10453, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp972 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5983,7 +5983,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add10456 = fadd <4 x float> %tmp973, %tmp972 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add10456, <4 x float>* undef, align 16 + store <4 x float> %add10456, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp974 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5993,7 +5993,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10459 = insertelement <4 x float> undef, float %add10458, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10459, <4 x float>* undef, align 16 + store <4 x float> %vecins10459, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp975 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6015,7 +6015,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10465 = insertelement <4 x float> %tmp978, float %add10464, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10465, <4 x float>* undef, align 16 + store <4 x float> %vecins10465, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp979 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6027,9 +6027,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10468 = insertelement <4 x float> %tmp980, float %add10467, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10468, <4 x float>* undef, align 16 + store <4 x float> %vecins10468, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp981 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6045,7 +6045,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10473 = insertelement <4 x float> %tmp983, float %add10472, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10473, <4 x float>* undef, align 16 + store <4 x float> %vecins10473, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp984 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6057,15 +6057,15 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10476 = insertelement <4 x float> %tmp985, float %add10475, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10476, <4 x float>* undef, align 16 + store <4 x float> %vecins10476, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10489 = fadd float %val, 0x4074666660000000 + %add10489 = fadd float undef, 0x4074666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp986 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10490 = insertelement <4 x float> %tmp986, float %add10489, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10490, <4 x float>* undef, align 16 + store <4 x float> %vecins10490, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp987 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6079,9 +6079,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10510 = insertelement <4 x float> %tmp989, float %add10509, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10510, <4 x float>* undef, align 16 + store <4 x float> %vecins10510, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp990 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6097,17 +6097,17 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10515 = insertelement <4 x float> %tmp992, float %add10514, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10515, <4 x float>* undef, align 16 + store <4 x float> %vecins10515, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp993 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10562 = fadd float %val, 2.035000e+02 + %add10562 = fadd float undef, 2.035000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp994 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10563 = insertelement <4 x float> %tmp994, float %add10562, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10563, <4 x float>* undef, align 16 + store <4 x float> %vecins10563, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp995 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6119,9 +6119,9 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10566 = insertelement <4 x float> %tmp996, float %add10565, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10566, <4 x float>* undef, align 16 + store <4 x float> %vecins10566, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral10567 + store <4 x float> , <4 x float>* %.compoundliteral10567 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp997 = load <4 x float>, <4 x float>* %.compoundliteral10567 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6139,7 +6139,7 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10571 = insertelement <4 x float> %tmp1000, float %add10570, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10571, <4 x float>* undef, align 16 + store <4 x float> %vecins10571, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp1001 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6151,56 +6151,56 @@ entry: tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10574 = insertelement <4 x float> %tmp1002, float %add10573, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10574, <4 x float>* undef, align 16 + store <4 x float> %vecins10574, <4 x float>* undef, align 16 %tmp1003 = load <4 x float>, <4 x float>* undef, align 16 %vecext10575 = extractelement <4 x float> %tmp1003, i32 2 %tmp1004 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10577 = insertelement <4 x float> %tmp1004, float %val, i32 2 - store volatile <4 x float> %vecins10577, <4 x float>* undef, align 16 + %vecins10577 = insertelement <4 x float> %tmp1004, float undef, i32 2 + store <4 x float> %vecins10577, <4 x float>* undef, align 16 %tmp1005 = load <4 x float>, <4 x float>* undef, align 16 %vecext10578 = extractelement <4 x float> %tmp1005, i32 3 %add10579 = fadd float %vecext10578, 0x4076566660000000 %tmp1006 = load <4 x float>, <4 x float>* undef, align 16 %vecins10580 = insertelement <4 x float> %tmp1006, float %add10579, i32 3 - store volatile <4 x float> %vecins10580, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10581 + store <4 x float> %vecins10580, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10581 %tmp1007 = load <4 x float>, <4 x float>* %.compoundliteral10581 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1008 = load <4 x float>, <4 x float>* undef, align 16 %vecext10583 = extractelement <4 x float> %tmp1008, i32 0 %add10584 = fadd float %vecext10583, 0xC060533340000000 %tmp1009 = load <4 x float>, <4 x float>* undef, align 16 %vecins10585 = insertelement <4 x float> %tmp1009, float %add10584, i32 0 - store volatile <4 x float> %vecins10585, <4 x float>* undef, align 16 + store <4 x float> %vecins10585, <4 x float>* undef, align 16 %tmp1010 = load <4 x float>, <4 x float>* undef, align 16 %vecext10586 = extractelement <4 x float> %tmp1010, i32 1 %add10587 = fadd float %vecext10586, 0xC0694CCCC0000000 %tmp1011 = load <4 x float>, <4 x float>* undef, align 16 %vecins10588 = insertelement <4 x float> %tmp1011, float %add10587, i32 1 - store volatile <4 x float> %vecins10588, <4 x float>* undef, align 16 + store <4 x float> %vecins10588, <4 x float>* undef, align 16 %tmp1012 = load <4 x float>, <4 x float>* undef, align 16 %vecext10589 = extractelement <4 x float> %tmp1012, i32 2 %add10590 = fadd float %vecext10589, 0xC0541999A0000000 %tmp1013 = load <4 x float>, <4 x float>* undef, align 16 %vecins10591 = insertelement <4 x float> %tmp1013, float %add10590, i32 2 - store volatile <4 x float> %vecins10591, <4 x float>* undef, align 16 + store <4 x float> %vecins10591, <4 x float>* undef, align 16 %tmp1014 = load <4 x float>, <4 x float>* undef, align 16 %vecext10592 = extractelement <4 x float> %tmp1014, i32 3 %add10593 = fadd float %vecext10592, 0xC06C566660000000 %tmp1015 = load <4 x float>, <4 x float>* undef, align 16 %vecins10594 = insertelement <4 x float> %tmp1015, float %add10593, i32 3 - store volatile <4 x float> %vecins10594, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10595 + store <4 x float> %vecins10594, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10595 %tmp1016 = load <4 x float>, <4 x float>* %.compoundliteral10595 %tmp1017 = load <4 x float>, <4 x float>* undef, align 16 %add10596 = fadd <4 x float> %tmp1017, %tmp1016 - store volatile <4 x float> %add10596, <4 x float>* undef, align 16 + store <4 x float> %add10596, <4 x float>* undef, align 16 %tmp1018 = load <4 x float>, <4 x float>* undef, align 16 %vecext10597 = extractelement <4 x float> %tmp1018, i32 0 %add10598 = fadd float %vecext10597, 0x40640999A0000000 %tmp1019 = load <4 x float>, <4 x float>* undef, align 16 %vecins10599 = insertelement <4 x float> %tmp1019, float %add10598, i32 0 - store volatile <4 x float> %vecins10599, <4 x float>* undef, align 16 + store <4 x float> %vecins10599, <4 x float>* undef, align 16 %tmp1020 = load <4 x float>, <4 x float>* undef, align 16 %vecext10600 = extractelement <4 x float> %tmp1020, i32 1 %add10601 = fadd float %vecext10600, 0xC073966660000000 @@ -6211,48 +6211,48 @@ entry: %add10604 = fadd float %vecext10603, 1.780000e+02 %tmp1023 = load <4 x float>, <4 x float>* undef, align 16 %vecins10605 = insertelement <4 x float> %tmp1023, float %add10604, i32 2 - store volatile <4 x float> %vecins10605, <4 x float>* undef, align 16 + store <4 x float> %vecins10605, <4 x float>* undef, align 16 %tmp1024 = load <4 x float>, <4 x float>* undef, align 16 - %add10607 = fadd float %val, 0x4070A33340000000 + %add10607 = fadd float undef, 0x4070A33340000000 %tmp1025 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10609 + store <4 x float> , <4 x float>* %.compoundliteral10609 %tmp1026 = load <4 x float>, <4 x float>* %.compoundliteral10609 %tmp1027 = load <4 x float>, <4 x float>* undef, align 16 %tmp1028 = load <4 x float>, <4 x float>* undef, align 16 %vecext10611 = extractelement <4 x float> %tmp1028, i32 0 %add10612 = fadd float %vecext10611, 0x40757199A0000000 %vecins10613 = insertelement <4 x float> undef, float %add10612, i32 0 - store volatile <4 x float> %vecins10613, <4 x float>* undef, align 16 + store <4 x float> %vecins10613, <4 x float>* undef, align 16 %tmp1029 = load <4 x float>, <4 x float>* undef, align 16 %vecext10614 = extractelement <4 x float> %tmp1029, i32 1 %add10615 = fadd float %vecext10614, 0x40740CCCC0000000 %tmp1030 = load <4 x float>, <4 x float>* undef, align 16 %vecins10616 = insertelement <4 x float> %tmp1030, float %add10615, i32 1 - store volatile <4 x float> %vecins10616, <4 x float>* undef, align 16 + store <4 x float> %vecins10616, <4 x float>* undef, align 16 %tmp1031 = load <4 x float>, <4 x float>* undef, align 16 %vecext10617 = extractelement <4 x float> %tmp1031, i32 2 %add10618 = fadd float %vecext10617, 0xC012CCCCC0000000 %tmp1032 = load <4 x float>, <4 x float>* undef, align 16 %vecins10619 = insertelement <4 x float> %tmp1032, float %add10618, i32 2 - store volatile <4 x float> %vecins10619, <4 x float>* undef, align 16 + store <4 x float> %vecins10619, <4 x float>* undef, align 16 %tmp1033 = load <4 x float>, <4 x float>* undef, align 16 %vecext10620 = extractelement <4 x float> %tmp1033, i32 3 %add10621 = fadd float %vecext10620, 0x406E566660000000 %tmp1034 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10623 + store <4 x float> , <4 x float>* %.compoundliteral10623 %tmp1035 = load <4 x float>, <4 x float>* %.compoundliteral10623 %add10624 = fadd <4 x float> undef, %tmp1035 %tmp1036 = load <4 x float>, <4 x float>* undef, align 16 %vecext10625 = extractelement <4 x float> %tmp1036, i32 0 %tmp1037 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10627 = insertelement <4 x float> %tmp1037, float %val, i32 0 - store volatile <4 x float> %vecins10627, <4 x float>* undef, align 16 + %vecins10627 = insertelement <4 x float> %tmp1037, float undef, i32 0 + store <4 x float> %vecins10627, <4 x float>* undef, align 16 %tmp1038 = load <4 x float>, <4 x float>* undef, align 16 %vecext10628 = extractelement <4 x float> %tmp1038, i32 1 %add10629 = fadd float %vecext10628, 0x407E3CCCC0000000 %tmp1039 = load <4 x float>, <4 x float>* undef, align 16 %vecins10630 = insertelement <4 x float> %tmp1039, float %add10629, i32 1 - store volatile <4 x float> %vecins10630, <4 x float>* undef, align 16 + store <4 x float> %vecins10630, <4 x float>* undef, align 16 %tmp1040 = load <4 x float>, <4 x float>* undef, align 16 %vecext10631 = extractelement <4 x float> %tmp1040, i32 2 %tmp1041 = load <4 x float>, <4 x float>* undef, align 16 @@ -6261,8 +6261,8 @@ entry: %add10635 = fadd float %vecext10634, 0xC067533340000000 %tmp1043 = load <4 x float>, <4 x float>* undef, align 16 %vecins10636 = insertelement <4 x float> %tmp1043, float %add10635, i32 3 - store volatile <4 x float> %vecins10636, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10637 + store <4 x float> %vecins10636, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10637 %tmp1044 = load <4 x float>, <4 x float>* undef, align 16 %add10638 = fadd <4 x float> %tmp1044, undef %tmp1045 = load <4 x float>, <4 x float>* undef, align 16 @@ -6270,94 +6270,94 @@ entry: %add10640 = fadd float %vecext10639, 0x406CA33340000000 %tmp1046 = load <4 x float>, <4 x float>* undef, align 16 %vecins10641 = insertelement <4 x float> %tmp1046, float %add10640, i32 0 - store volatile <4 x float> %vecins10641, <4 x float>* undef, align 16 + store <4 x float> %vecins10641, <4 x float>* undef, align 16 %tmp1047 = load <4 x float>, <4 x float>* undef, align 16 %vecext10642 = extractelement <4 x float> %tmp1047, i32 1 %add10643 = fadd float %vecext10642, 0xC07C8999A0000000 %tmp1048 = load <4 x float>, <4 x float>* undef, align 16 %vecins10644 = insertelement <4 x float> %tmp1048, float %add10643, i32 1 - store volatile <4 x float> %vecins10644, <4 x float>* undef, align 16 + store <4 x float> %vecins10644, <4 x float>* undef, align 16 %tmp1049 = load <4 x float>, <4 x float>* undef, align 16 %vecext10645 = extractelement <4 x float> %tmp1049, i32 2 %tmp1050 = load <4 x float>, <4 x float>* undef, align 16 %tmp1051 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10748 = insertelement <4 x float> undef, float %val, i32 3 + %vecins10748 = insertelement <4 x float> undef, float undef, i32 3 %tmp1052 = load <4 x float>, <4 x float>* %.compoundliteral10749 %add10750 = fadd <4 x float> undef, %tmp1052 - store volatile <4 x float> %add10750, <4 x float>* undef, align 16 + store <4 x float> %add10750, <4 x float>* undef, align 16 %tmp1053 = load <4 x float>, <4 x float>* undef, align 16 %vecext10751 = extractelement <4 x float> %tmp1053, i32 0 %add10752 = fadd float %vecext10751, 0x4071B33340000000 %tmp1054 = load <4 x float>, <4 x float>* undef, align 16 %vecins10753 = insertelement <4 x float> %tmp1054, float %add10752, i32 0 - store volatile <4 x float> %vecins10753, <4 x float>* undef, align 16 + store <4 x float> %vecins10753, <4 x float>* undef, align 16 %tmp1055 = load <4 x float>, <4 x float>* undef, align 16 %vecext10754 = extractelement <4 x float> %tmp1055, i32 1 %add10755 = fadd float %vecext10754, 0xC076A66660000000 %tmp1056 = load <4 x float>, <4 x float>* undef, align 16 %vecins10756 = insertelement <4 x float> %tmp1056, float %add10755, i32 1 - store volatile <4 x float> %vecins10756, <4 x float>* undef, align 16 + store <4 x float> %vecins10756, <4 x float>* undef, align 16 %tmp1057 = load <4 x float>, <4 x float>* undef, align 16 %vecext10757 = extractelement <4 x float> %tmp1057, i32 2 %add10758 = fadd float %vecext10757, 3.800000e+01 %tmp1058 = load <4 x float>, <4 x float>* undef, align 16 %vecins10759 = insertelement <4 x float> %tmp1058, float %add10758, i32 2 - store volatile <4 x float> %vecins10759, <4 x float>* undef, align 16 + store <4 x float> %vecins10759, <4 x float>* undef, align 16 %tmp1059 = load <4 x float>, <4 x float>* undef, align 16 %vecext10760 = extractelement <4 x float> %tmp1059, i32 3 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10763 + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10763 %tmp1060 = load <4 x float>, <4 x float>* %.compoundliteral10763 %tmp1061 = load <4 x float>, <4 x float>* undef, align 16 %tmp1062 = load <4 x float>, <4 x float>* undef, align 16 - %add10985 = fadd float %val, 0x405E933340000000 + %add10985 = fadd float undef, 0x405E933340000000 %tmp1063 = load <4 x float>, <4 x float>* undef, align 16 %vecins10986 = insertelement <4 x float> %tmp1063, float %add10985, i32 3 - store volatile <4 x float> %vecins10986, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10987 + store <4 x float> %vecins10986, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10987 %tmp1064 = load <4 x float>, <4 x float>* %.compoundliteral10987 %tmp1065 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10994 = insertelement <4 x float> %tmp1065, float %val, i32 1 + %vecins10994 = insertelement <4 x float> %tmp1065, float undef, i32 1 %tmp1066 = load <4 x float>, <4 x float>* undef, align 16 %vecext10995 = extractelement <4 x float> %tmp1066, i32 2 %add10996 = fadd float %vecext10995, 0x406F9999A0000000 %tmp1067 = load <4 x float>, <4 x float>* undef, align 16 %vecins10997 = insertelement <4 x float> %tmp1067, float %add10996, i32 2 - store volatile <4 x float> %vecins10997, <4 x float>* undef, align 16 + store <4 x float> %vecins10997, <4 x float>* undef, align 16 %tmp1068 = load <4 x float>, <4 x float>* undef, align 16 %vecext10998 = extractelement <4 x float> %tmp1068, i32 3 %add10999 = fadd float %vecext10998, -2.765000e+02 %tmp1069 = load <4 x float>, <4 x float>* undef, align 16 %vecins11000 = insertelement <4 x float> %tmp1069, float %add10999, i32 3 - store volatile <4 x float> %vecins11000, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral11001 + store <4 x float> %vecins11000, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral11001 %tmp1070 = load <4 x float>, <4 x float>* undef, align 16 %add11002 = fadd <4 x float> %tmp1070, undef %vecext11003 = extractelement <4 x float> undef, i32 0 %vecext11009 = extractelement <4 x float> undef, i32 2 %tmp1071 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11033 = insertelement <4 x float> %tmp1071, float %val, i32 0 - store volatile <4 x float> %vecins11033, <4 x float>* undef, align 16 + %vecins11033 = insertelement <4 x float> %tmp1071, float undef, i32 0 + store <4 x float> %vecins11033, <4 x float>* undef, align 16 %tmp1072 = load <4 x float>, <4 x float>* undef, align 16 %vecext11034 = extractelement <4 x float> %tmp1072, i32 1 %add11035 = fadd float %vecext11034, 0x4056D33340000000 %tmp1073 = load <4 x float>, <4 x float>* undef, align 16 %vecins11036 = insertelement <4 x float> %tmp1073, float %add11035, i32 1 - store volatile <4 x float> %vecins11036, <4 x float>* undef, align 16 + store <4 x float> %vecins11036, <4 x float>* undef, align 16 %tmp1074 = load <4 x float>, <4 x float>* undef, align 16 %vecext11037 = extractelement <4 x float> %tmp1074, i32 2 %add11038 = fadd float %vecext11037, 0xC06EA33340000000 %tmp1075 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1076 = load <4 x float>, <4 x float>* undef, align 16 %vecext11040 = extractelement <4 x float> %tmp1076, i32 3 %add11041 = fadd float %vecext11040, 0x40746CCCC0000000 %tmp1077 = load <4 x float>, <4 x float>* undef, align 16 %vecins11042 = insertelement <4 x float> %tmp1077, float %add11041, i32 3 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef %tmp1078 = load <4 x float>, <4 x float>* undef, align 16 %add11044 = fadd <4 x float> %tmp1078, undef - store volatile <4 x float> %add11044, <4 x float>* undef, align 16 + store <4 x float> %add11044, <4 x float>* undef, align 16 %tmp1079 = load <4 x float>, <4 x float>* undef, align 16 %vecext11045 = extractelement <4 x float> %tmp1079, i32 0 %add11046 = fadd float %vecext11045, 0xC076E66660000000 @@ -6366,58 +6366,58 @@ entry: %tmp1081 = load <4 x float>, <4 x float>* undef, align 16 %vecext11048 = extractelement <4 x float> %tmp1081, i32 1 %add11049 = fadd float %vecext11048, 4.100000e+02 - %vecins11064 = insertelement <4 x float> undef, float %val, i32 1 - %add11074 = fadd float %val, 0xC06FF999A0000000 + %vecins11064 = insertelement <4 x float> undef, float undef, i32 1 + %add11074 = fadd float undef, 0xC06FF999A0000000 %tmp1082 = load <4 x float>, <4 x float>* undef, align 16 %vecins11075 = insertelement <4 x float> %tmp1082, float %add11074, i32 0 - store volatile <4 x float> %vecins11075, <4 x float>* undef, align 16 - %add11077 = fadd float %val, 0xC075D33340000000 + store <4 x float> %vecins11075, <4 x float>* undef, align 16 + %add11077 = fadd float undef, 0xC075D33340000000 %tmp1083 = load <4 x float>, <4 x float>* undef, align 16 %tmp1084 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1085 = load <4 x float>, <4 x float>* undef, align 16 %vecext11093 = extractelement <4 x float> %tmp1085, i32 2 %add11094 = fadd float %vecext11093, 0xC07CD66660000000 %tmp1086 = load <4 x float>, <4 x float>* undef, align 16 %vecins11095 = insertelement <4 x float> %tmp1086, float %add11094, i32 2 - store volatile <4 x float> %vecins11095, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins11095, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1087 = load <4 x float>, <4 x float>* undef - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1088 = load <4 x float>, <4 x float>* undef, align 16 %vecext11513 = extractelement <4 x float> %tmp1088, i32 2 %add11514 = fadd float %vecext11513, 0xC07C7199A0000000 %vecins11515 = insertelement <4 x float> undef, float %add11514, i32 2 - store volatile <4 x float> %vecins11515, <4 x float>* undef, align 16 + store <4 x float> %vecins11515, <4 x float>* undef, align 16 %add11520 = fadd <4 x float> undef, undef - store volatile <4 x float> %add11520, <4 x float>* undef, align 16 + store <4 x float> %add11520, <4 x float>* undef, align 16 %vecext11521 = extractelement <4 x float> undef, i32 0 %add11522 = fadd float %vecext11521, 0x4041733340000000 %tmp1089 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1090 = load <4 x float>, <4 x float>* undef %tmp1091 = load <4 x float>, <4 x float>* undef, align 16 %add11562 = fadd <4 x float> %tmp1091, %tmp1090 %tmp1092 = load <4 x float>, <4 x float>* undef, align 16 - %add11564 = fadd float %val, 0xC0411999A0000000 + %add11564 = fadd float undef, 0xC0411999A0000000 %tmp1093 = load <4 x float>, <4 x float>* undef, align 16 %vecins11565 = insertelement <4 x float> %tmp1093, float %add11564, i32 0 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext11586 = extractelement <4 x float> undef, i32 3 %add11587 = fadd float %vecext11586, 3.760000e+02 %tmp1094 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1095 = load <4 x float>, <4 x float>* undef %tmp1096 = load <4 x float>, <4 x float>* undef, align 16 %tmp1097 = load <4 x float>, <4 x float>* undef, align 16 %tmp1098 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11593 = insertelement <4 x float> %tmp1098, float %val, i32 0 + %vecins11593 = insertelement <4 x float> %tmp1098, float undef, i32 0 %vecext11594 = extractelement <4 x float> undef, i32 1 %tmp1099 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11596 = insertelement <4 x float> %tmp1099, float %val, i32 1 - store volatile <4 x float> %vecins11596, <4 x float>* undef, align 16 + %vecins11596 = insertelement <4 x float> %tmp1099, float undef, i32 1 + store <4 x float> %vecins11596, <4 x float>* undef, align 16 %tmp1100 = load <4 x float>, <4 x float>* undef, align 16 %vecext11597 = extractelement <4 x float> %tmp1100, i32 2 %add11598 = fadd float %vecext11597, 0x40430CCCC0000000 @@ -6426,34 +6426,34 @@ entry: %tmp1102 = load <4 x float>, <4 x float>* undef, align 16 %vecext11600 = extractelement <4 x float> %tmp1102, i32 3 %tmp1103 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11602 = insertelement <4 x float> %tmp1103, float %val, i32 3 - store volatile <4 x float> %vecins11602, <4 x float>* undef, align 16 + %vecins11602 = insertelement <4 x float> %tmp1103, float undef, i32 3 + store <4 x float> %vecins11602, <4 x float>* undef, align 16 %tmp1104 = load <4 x float>, <4 x float>* undef %tmp1105 = load <4 x float>, <4 x float>* undef, align 16 %add11604 = fadd <4 x float> %tmp1105, %tmp1104 %tmp1106 = load <4 x float>, <4 x float>* undef, align 16 %vecext11605 = extractelement <4 x float> %tmp1106, i32 0 %tmp1107 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11607 = insertelement <4 x float> %tmp1107, float %val, i32 0 - %vecins11621 = insertelement <4 x float> undef, float %val, i32 0 - %vecins11630 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins11630, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral11631 + %vecins11607 = insertelement <4 x float> %tmp1107, float undef, i32 0 + %vecins11621 = insertelement <4 x float> undef, float undef, i32 0 + %vecins11630 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins11630, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral11631 %tmp1108 = load <4 x float>, <4 x float>* %.compoundliteral11631 %tmp1109 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add11634 = fadd float %val, -1.075000e+02 + store <4 x float> undef, <4 x float>* undef, align 16 + %add11634 = fadd float undef, -1.075000e+02 %vecext11647 = extractelement <4 x float> undef, i32 0 %add11648 = fadd float %vecext11647, 0x40775999A0000000 %tmp1110 = load <4 x float>, <4 x float>* undef, align 16 %vecext11650 = extractelement <4 x float> undef, i32 1 %tmp1111 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11784 = insertelement <4 x float> %tmp1111, float %val, i32 3 - store volatile <4 x float> %vecins11784, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral11785 + %vecins11784 = insertelement <4 x float> %tmp1111, float undef, i32 3 + store <4 x float> %vecins11784, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral11785 %tmp1112 = load <4 x float>, <4 x float>* %.compoundliteral11785 %add11786 = fadd <4 x float> undef, %tmp1112 - store volatile <4 x float> %add11786, <4 x float>* undef, align 16 + store <4 x float> %add11786, <4 x float>* undef, align 16 %tmp1113 = load <4 x float>, <4 x float>* undef, align 16 %vecext11787 = extractelement <4 x float> %tmp1113, i32 0 %vecext11807 = extractelement <4 x float> undef, i32 2 @@ -6463,60 +6463,60 @@ entry: %add11811 = fadd float %vecext11810, 0x4068F66660000000 %tmp1115 = load <4 x float>, <4 x float>* undef, align 16 %vecins11812 = insertelement <4 x float> %tmp1115, float %add11811, i32 3 - store volatile <4 x float> %vecins11812, <4 x float>* undef, align 16 + store <4 x float> %vecins11812, <4 x float>* undef, align 16 %tmp1116 = load <4 x float>, <4 x float>* undef %tmp1117 = load <4 x float>, <4 x float>* undef, align 16 %vecext11958 = extractelement <4 x float> undef, i32 1 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext11961 = extractelement <4 x float> undef, i32 2 %add11962 = fadd float %vecext11961, -3.680000e+02 %tmp1118 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add11965 = fadd float %val, 0x4061133340000000 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 + %add11965 = fadd float undef, 0x4061133340000000 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1119 = load <4 x float>, <4 x float>* undef, align 16 %vecext11975 = extractelement <4 x float> %tmp1119, i32 2 %tmp1120 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11977 = insertelement <4 x float> %tmp1120, float %val, i32 2 - store volatile <4 x float> %vecins11977, <4 x float>* undef, align 16 + %vecins11977 = insertelement <4 x float> %tmp1120, float undef, i32 2 + store <4 x float> %vecins11977, <4 x float>* undef, align 16 %vecext11978 = extractelement <4 x float> undef, i32 3 %add11979 = fadd float %vecext11978, 0xC0688999A0000000 %tmp1121 = load <4 x float>, <4 x float>* undef, align 16 %vecins11980 = insertelement <4 x float> %tmp1121, float %add11979, i32 3 - store volatile <4 x float> %vecins11980, <4 x float>* undef, align 16 + store <4 x float> %vecins11980, <4 x float>* undef, align 16 %add11982 = fadd <4 x float> undef, undef - store volatile <4 x float> %add11982, <4 x float>* undef, align 16 + store <4 x float> %add11982, <4 x float>* undef, align 16 %tmp1122 = load <4 x float>, <4 x float>* undef, align 16 %vecext11983 = extractelement <4 x float> %tmp1122, i32 0 %add11984 = fadd float %vecext11983, 0xC075966660000000 %tmp1123 = load <4 x float>, <4 x float>* undef, align 16 - %vecins12005 = insertelement <4 x float> undef, float %val, i32 2 - store volatile <4 x float> %vecins12005, <4 x float>* undef, align 16 + %vecins12005 = insertelement <4 x float> undef, float undef, i32 2 + store <4 x float> %vecins12005, <4 x float>* undef, align 16 %tmp1124 = load <4 x float>, <4 x float>* undef, align 16 - %add12007 = fadd float %val, 0xC07124CCC0000000 + %add12007 = fadd float undef, 0xC07124CCC0000000 %vecins12008 = insertelement <4 x float> undef, float %add12007, i32 3 - store volatile <4 x float> %vecins12008, <4 x float>* undef, align 16 + store <4 x float> %vecins12008, <4 x float>* undef, align 16 %tmp1125 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1126 = load <4 x float>, <4 x float>* undef, align 16 - %add12012 = fadd float %val, 0xC0750CCCC0000000 + %add12012 = fadd float undef, 0xC0750CCCC0000000 %tmp1127 = load <4 x float>, <4 x float>* undef, align 16 %vecins12013 = insertelement <4 x float> %tmp1127, float %add12012, i32 0 - store volatile <4 x float> %vecins12013, <4 x float>* undef, align 16 + store <4 x float> %vecins12013, <4 x float>* undef, align 16 %tmp1128 = load <4 x float>, <4 x float>* undef, align 16 - %add12015 = fadd float %val, 0x4079CE6660000000 + %add12015 = fadd float undef, 0x4079CE6660000000 %tmp1129 = load <4 x float>, <4 x float>* undef, align 16 %vecins12016 = insertelement <4 x float> %tmp1129, float %add12015, i32 1 - store volatile <4 x float> %vecins12016, <4 x float>* undef, align 16 - %add12018 = fadd float %val, 3.555000e+02 + store <4 x float> %vecins12016, <4 x float>* undef, align 16 + %add12018 = fadd float undef, 3.555000e+02 %tmp1130 = load <4 x float>, <4 x float>* undef, align 16 %vecins12019 = insertelement <4 x float> %tmp1130, float %add12018, i32 2 %tmp1131 = load <4 x float>, <4 x float>* undef, align 16 %vecext12020 = extractelement <4 x float> %tmp1131, i32 3 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext12028 = extractelement <4 x float> undef, i32 1 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1132 = load <4 x float>, <4 x float>* undef, align 16 %add12038 = fadd <4 x float> %tmp1132, undef %tmp1133 = load <4 x float>, <4 x float>* undef, align 16 @@ -6524,27 +6524,27 @@ entry: %add12043 = fadd float %vecext12042, 0x402F9999A0000000 %tmp1134 = load <4 x float>, <4 x float>* undef, align 16 %vecins12044 = insertelement <4 x float> %tmp1134, float %add12043, i32 1 - store volatile <4 x float> %vecins12044, <4 x float>* undef, align 16 + store <4 x float> %vecins12044, <4 x float>* undef, align 16 %vecext12045 = extractelement <4 x float> undef, i32 2 %add12046 = fadd float %vecext12045, 0xC07EF33340000000 %tmp1135 = load <4 x float>, <4 x float>* undef, align 16 %vecins12047 = insertelement <4 x float> %tmp1135, float %add12046, i32 2 - store volatile <4 x float> %vecins12047, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> %vecins12047, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1136 = load <4 x float>, <4 x float>* undef, align 16 %vecext12112 = extractelement <4 x float> %tmp1136, i32 1 %tmp1137 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add12116 = fadd float %val, 0xC074F4CCC0000000 + store <4 x float> undef, <4 x float>* undef, align 16 + %add12116 = fadd float undef, 0xC074F4CCC0000000 %tmp1138 = load <4 x float>, <4 x float>* undef, align 16 %vecins12117 = insertelement <4 x float> %tmp1138, float %add12116, i32 2 - store volatile <4 x float> %vecins12117, <4 x float>* undef, align 16 + store <4 x float> %vecins12117, <4 x float>* undef, align 16 %tmp1139 = load <4 x float>, <4 x float>* undef, align 16 %vecext12118 = extractelement <4 x float> %tmp1139, i32 3 %add12119 = fadd float %vecext12118, 0xC0638CCCC0000000 %tmp1140 = load <4 x float>, <4 x float>* undef, align 16 %vecins12120 = insertelement <4 x float> %tmp1140, float %add12119, i32 3 - %add12152 = fadd float %val, 0x4039333340000000 + %add12152 = fadd float undef, 0x4039333340000000 %tmp1141 = load <4 x float>, <4 x float>* undef, align 16 %vecins12153 = insertelement <4 x float> %tmp1141, float %add12152, i32 0 %vecext12154 = extractelement <4 x float> undef, i32 1 @@ -6561,67 +6561,67 @@ entry: %add12161 = fadd float %vecext12160, 0x407B1999A0000000 %tmp1146 = load <4 x float>, <4 x float>* undef, align 16 %vecins12162 = insertelement <4 x float> %tmp1146, float %add12161, i32 3 - store volatile <4 x float> %vecins12162, <4 x float>* undef, align 16 + store <4 x float> %vecins12162, <4 x float>* undef, align 16 %tmp1147 = load <4 x float>, <4 x float>* undef %tmp1148 = load <4 x float>, <4 x float>* undef, align 16 %tmp1149 = load <4 x float>, <4 x float>* undef, align 16 %vecext12182 = extractelement <4 x float> %tmp1149, i32 1 %tmp1150 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef - %add12208 = fadd float %val, 0x407854CCC0000000 + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef + %add12208 = fadd float undef, 0x407854CCC0000000 %tmp1151 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1152 = load <4 x float>, <4 x float>* undef, align 16 %tmp1153 = load <4 x float>, <4 x float>* undef, align 16 - %vecins12218 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins12218, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins12218 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins12218, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1154 = load <4 x float>, <4 x float>* undef %tmp1155 = load <4 x float>, <4 x float>* undef, align 16 %add12220 = fadd <4 x float> %tmp1155, %tmp1154 %tmp1156 = load <4 x float>, <4 x float>* undef, align 16 %tmp1157 = load <4 x float>, <4 x float>* undef, align 16 - %vecins12223 = insertelement <4 x float> %tmp1157, float %val, i32 0 - store volatile <4 x float> %vecins12223, <4 x float>* undef, align 16 + %vecins12223 = insertelement <4 x float> %tmp1157, float undef, i32 0 + store <4 x float> %vecins12223, <4 x float>* undef, align 16 %tmp1158 = load <4 x float>, <4 x float>* undef, align 16 - %add12242 = fadd float %val, 0x4067E33340000000 + %add12242 = fadd float undef, 0x4067E33340000000 %tmp1159 = load <4 x float>, <4 x float>* undef, align 16 %vecins12243 = insertelement <4 x float> %tmp1159, float %add12242, i32 2 - store volatile <4 x float> %vecins12243, <4 x float>* undef, align 16 + store <4 x float> %vecins12243, <4 x float>* undef, align 16 %tmp1160 = load <4 x float>, <4 x float>* undef, align 16 %vecext12244 = extractelement <4 x float> %tmp1160, i32 3 %add12245 = fadd float %vecext12244, 0x4071AE6660000000 %tmp1161 = load <4 x float>, <4 x float>* undef, align 16 %vecins12246 = insertelement <4 x float> %tmp1161, float %add12245, i32 3 - store volatile <4 x float> %vecins12246, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral12247 + store <4 x float> %vecins12246, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral12247 %tmp1162 = load <4 x float>, <4 x float>* %.compoundliteral12247 %tmp1163 = load <4 x float>, <4 x float>* undef, align 16 %add12248 = fadd <4 x float> %tmp1163, %tmp1162 - store volatile <4 x float> %add12248, <4 x float>* undef, align 16 + store <4 x float> %add12248, <4 x float>* undef, align 16 %tmp1164 = load <4 x float>, <4 x float>* undef, align 16 %vecext12249 = extractelement <4 x float> %tmp1164, i32 0 %add12250 = fadd float %vecext12249, 1.075000e+02 %tmp1165 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1166 = load <4 x float>, <4 x float>* undef, align 16 %vecext12252 = extractelement <4 x float> %tmp1166, i32 1 %add12253 = fadd float %vecext12252, 0xC0662CCCC0000000 %tmp1167 = load <4 x float>, <4 x float>* undef, align 16 %vecins12254 = insertelement <4 x float> %tmp1167, float %add12253, i32 1 - store volatile <4 x float> %vecins12254, <4 x float>* undef, align 16 + store <4 x float> %vecins12254, <4 x float>* undef, align 16 %tmp1168 = load <4 x float>, <4 x float>* undef, align 16 %vecext12255 = extractelement <4 x float> %tmp1168, i32 2 %add12256 = fadd float %vecext12255, 0x40554CCCC0000000 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add13141 = fadd float %val, 0x40768999A0000000 + store <4 x float> undef, <4 x float>* undef, align 16 + %add13141 = fadd float undef, 0x40768999A0000000 %tmp1169 = load <4 x float>, <4 x float>* undef, align 16 %vecins13142 = insertelement <4 x float> %tmp1169, float %add13141, i32 3 - store volatile <4 x float> %vecins13142, <4 x float>* undef, align 16 + store <4 x float> %vecins13142, <4 x float>* undef, align 16 %tmp1170 = load <4 x float>, <4 x float>* undef %add13144 = fadd <4 x float> undef, %tmp1170 - store volatile <4 x float> %add13144, <4 x float>* undef, align 16 + store <4 x float> %add13144, <4 x float>* undef, align 16 %tmp1171 = load <4 x float>, <4 x float>* undef, align 16 %vecext13145 = extractelement <4 x float> %tmp1171, i32 0 %add13146 = fadd float %vecext13145, 3.975000e+02 @@ -6630,137 +6630,137 @@ entry: %add13379 = fadd float %vecext13378, 0xC053B33340000000 %tmp1173 = load <4 x float>, <4 x float>* undef, align 16 %vecins13380 = insertelement <4 x float> %tmp1173, float %add13379, i32 3 - store volatile <4 x float> %vecins13380, <4 x float>* undef, align 16 + store <4 x float> %vecins13380, <4 x float>* undef, align 16 %tmp1174 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13408 = insertelement <4 x float> %tmp1174, float %val, i32 3 - store volatile <4 x float> %vecins13408, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins13408 = insertelement <4 x float> %tmp1174, float undef, i32 3 + store <4 x float> %vecins13408, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1175 = load <4 x float>, <4 x float>* undef %tmp1176 = load <4 x float>, <4 x float>* undef, align 16 %add13410 = fadd <4 x float> %tmp1176, %tmp1175 - store volatile <4 x float> %add13410, <4 x float>* undef, align 16 + store <4 x float> %add13410, <4 x float>* undef, align 16 %tmp1177 = load <4 x float>, <4 x float>* undef, align 16 - %add13412 = fadd float %val, 0xC0708999A0000000 + %add13412 = fadd float undef, 0xC0708999A0000000 %tmp1178 = load <4 x float>, <4 x float>* undef, align 16 %vecins13413 = insertelement <4 x float> %tmp1178, float %add13412, i32 0 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext13428 = extractelement <4 x float> undef, i32 1 %add13429 = fadd float %vecext13428, 0xC063BCCCC0000000 %tmp1179 = load <4 x float>, <4 x float>* undef, align 16 %vecins13430 = insertelement <4 x float> %tmp1179, float %add13429, i32 1 - store volatile <4 x float> %vecins13430, <4 x float>* undef, align 16 + store <4 x float> %vecins13430, <4 x float>* undef, align 16 %tmp1180 = load <4 x float>, <4 x float>* undef, align 16 %vecext13431 = extractelement <4 x float> %tmp1180, i32 2 - %vecins13433 = insertelement <4 x float> undef, float %val, i32 2 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add13449 = fadd float %val, 4.590000e+02 + %vecins13433 = insertelement <4 x float> undef, float undef, i32 2 + store <4 x float> undef, <4 x float>* undef, align 16 + %add13449 = fadd float undef, 4.590000e+02 %tmp1181 = load <4 x float>, <4 x float>* undef, align 16 %vecins13450 = insertelement <4 x float> %tmp1181, float %add13449, i32 3 - store volatile <4 x float> %vecins13450, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13450, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1182 = load <4 x float>, <4 x float>* undef %tmp1183 = load <4 x float>, <4 x float>* undef, align 16 %add13452 = fadd <4 x float> %tmp1183, %tmp1182 - store volatile <4 x float> %add13452, <4 x float>* undef, align 16 + store <4 x float> %add13452, <4 x float>* undef, align 16 %tmp1184 = load <4 x float>, <4 x float>* undef, align 16 %vecext13453 = extractelement <4 x float> %tmp1184, i32 0 %add13454 = fadd float %vecext13453, 0xC072866660000000 %tmp1185 = load <4 x float>, <4 x float>* undef, align 16 %vecins13455 = insertelement <4 x float> %tmp1185, float %add13454, i32 0 - %add13471 = fadd float %val, 0xC0556CCCC0000000 + %add13471 = fadd float undef, 0xC0556CCCC0000000 %tmp1186 = load <4 x float>, <4 x float>* undef, align 16 %vecins13472 = insertelement <4 x float> %tmp1186, float %add13471, i32 1 - store volatile <4 x float> %vecins13472, <4 x float>* undef, align 16 + store <4 x float> %vecins13472, <4 x float>* undef, align 16 %tmp1187 = load <4 x float>, <4 x float>* undef, align 16 %vecext13473 = extractelement <4 x float> %tmp1187, i32 2 %add13474 = fadd float %vecext13473, 0xC0786999A0000000 %tmp1188 = load <4 x float>, <4 x float>* undef, align 16 %vecins13475 = insertelement <4 x float> %tmp1188, float %add13474, i32 2 - store volatile <4 x float> %vecins13475, <4 x float>* undef, align 16 - %add13477 = fadd float %val, 0xC07C3E6660000000 + store <4 x float> %vecins13475, <4 x float>* undef, align 16 + %add13477 = fadd float undef, 0xC07C3E6660000000 %tmp1189 = load <4 x float>, <4 x float>* undef, align 16 %vecins13478 = insertelement <4 x float> %tmp1189, float %add13477, i32 3 - store volatile <4 x float> %vecins13478, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13478, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1190 = load <4 x float>, <4 x float>* undef, align 16 %add13480 = fadd <4 x float> %tmp1190, undef - store volatile <4 x float> %add13480, <4 x float>* undef, align 16 + store <4 x float> %add13480, <4 x float>* undef, align 16 %tmp1191 = load <4 x float>, <4 x float>* undef, align 16 %vecext13481 = extractelement <4 x float> %tmp1191, i32 0 %add13482 = fadd float %vecext13481, 0xC07BA4CCC0000000 %tmp1192 = load <4 x float>, <4 x float>* undef, align 16 %vecins13483 = insertelement <4 x float> %tmp1192, float %add13482, i32 0 - store volatile <4 x float> %vecins13483, <4 x float>* undef, align 16 + store <4 x float> %vecins13483, <4 x float>* undef, align 16 %tmp1193 = load <4 x float>, <4 x float>* undef, align 16 - %add13485 = fadd float %val, 0x406B1999A0000000 + %add13485 = fadd float undef, 0x406B1999A0000000 %tmp1194 = load <4 x float>, <4 x float>* undef, align 16 %vecins13486 = insertelement <4 x float> %tmp1194, float %add13485, i32 1 - store volatile <4 x float> %vecins13486, <4 x float>* undef, align 16 + store <4 x float> %vecins13486, <4 x float>* undef, align 16 %tmp1195 = load <4 x float>, <4 x float>* undef, align 16 %vecext13487 = extractelement <4 x float> %tmp1195, i32 2 %add13488 = fadd float %vecext13487, 0x40647999A0000000 %tmp1196 = load <4 x float>, <4 x float>* undef, align 16 %vecins13489 = insertelement <4 x float> %tmp1196, float %add13488, i32 2 - store volatile <4 x float> %vecins13489, <4 x float>* undef, align 16 + store <4 x float> %vecins13489, <4 x float>* undef, align 16 %tmp1197 = load <4 x float>, <4 x float>* undef, align 16 %vecext13490 = extractelement <4 x float> %tmp1197, i32 3 %tmp1198 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13492 = insertelement <4 x float> %tmp1198, float %val, i32 3 - store volatile <4 x float> %vecins13492, <4 x float>* undef, align 16 + %vecins13492 = insertelement <4 x float> %tmp1198, float undef, i32 3 + store <4 x float> %vecins13492, <4 x float>* undef, align 16 %tmp1199 = load <4 x float>, <4 x float>* %.compoundliteral13493 %tmp1200 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %vecins13548 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> , <4 x float>* %.compoundliteral13549 + store <4 x float> undef, <4 x float>* undef, align 16 + %vecins13548 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> , <4 x float>* %.compoundliteral13549 %tmp1201 = load <4 x float>, <4 x float>* undef, align 16 - %add13552 = fadd float %val, 3.230000e+02 + %add13552 = fadd float undef, 3.230000e+02 %tmp1202 = load <4 x float>, <4 x float>* undef, align 16 %vecins13553 = insertelement <4 x float> %tmp1202, float %add13552, i32 0 %tmp1203 = load <4 x float>, <4 x float>* undef, align 16 %vecext13554 = extractelement <4 x float> %tmp1203, i32 1 %tmp1204 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13556 = insertelement <4 x float> %tmp1204, float %val, i32 1 - store volatile <4 x float> %vecins13556, <4 x float>* undef, align 16 + %vecins13556 = insertelement <4 x float> %tmp1204, float undef, i32 1 + store <4 x float> %vecins13556, <4 x float>* undef, align 16 %tmp1205 = load <4 x float>, <4 x float>* undef, align 16 - %add13558 = fadd float %val, 2.625000e+02 + %add13558 = fadd float undef, 2.625000e+02 %tmp1206 = load <4 x float>, <4 x float>* undef, align 16 %vecins13559 = insertelement <4 x float> %tmp1206, float %add13558, i32 2 - store volatile <4 x float> %vecins13559, <4 x float>* undef, align 16 - %add13575 = fadd float %val, -4.725000e+02 + store <4 x float> %vecins13559, <4 x float>* undef, align 16 + %add13575 = fadd float undef, -4.725000e+02 %tmp1207 = load <4 x float>, <4 x float>* undef, align 16 %vecins13576 = insertelement <4 x float> %tmp1207, float %add13575, i32 3 - store volatile <4 x float> %vecins13576, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13576, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1208 = load <4 x float>, <4 x float>* undef %tmp1209 = load <4 x float>, <4 x float>* undef, align 16 %add13578 = fadd <4 x float> %tmp1209, %tmp1208 - store volatile <4 x float> %add13578, <4 x float>* undef, align 16 + store <4 x float> %add13578, <4 x float>* undef, align 16 %tmp1210 = load <4 x float>, <4 x float>* undef, align 16 %tmp1211 = load <4 x float>, <4 x float>* undef, align 16 %add13592 = fadd <4 x float> %tmp1211, undef - store volatile <4 x float> %add13592, <4 x float>* undef, align 16 + store <4 x float> %add13592, <4 x float>* undef, align 16 %tmp1212 = load <4 x float>, <4 x float>* undef, align 16 %vecext13593 = extractelement <4 x float> %tmp1212, i32 0 %add13594 = fadd float %vecext13593, 0xC0708B3340000000 %tmp1213 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1214 = load <4 x float>, <4 x float>* undef, align 16 %vecext13596 = extractelement <4 x float> %tmp1214, i32 1 %add13597 = fadd float %vecext13596, 0x40660999A0000000 - %vecins13604 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins13604, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins13604 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins13604, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1215 = load <4 x float>, <4 x float>* undef, align 16 %add13606 = fadd <4 x float> %tmp1215, undef %tmp1216 = load <4 x float>, <4 x float>* undef, align 16 %vecext13607 = extractelement <4 x float> %tmp1216, i32 0 - %vecins13609 = insertelement <4 x float> undef, float %val, i32 0 + %vecins13609 = insertelement <4 x float> undef, float undef, i32 0 %tmp1217 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1218 = load <4 x float>, <4 x float>* undef, align 16 - %add13622 = fadd float %val, -3.390000e+02 + %add13622 = fadd float undef, -3.390000e+02 %vecins13623 = insertelement <4 x float> undef, float %add13622, i32 0 - store volatile <4 x float> %vecins13623, <4 x float>* undef, align 16 + store <4 x float> %vecins13623, <4 x float>* undef, align 16 %tmp1219 = load <4 x float>, <4 x float>* undef, align 16 %vecext13624 = extractelement <4 x float> %tmp1219, i32 1 %add13625 = fadd float %vecext13624, 0x405C3999A0000000 @@ -6772,41 +6772,41 @@ entry: %add13631 = fadd float %vecext13630, 0xC060333340000000 %tmp1222 = load <4 x float>, <4 x float>* undef, align 16 %vecins13632 = insertelement <4 x float> %tmp1222, float %add13631, i32 3 - store volatile <4 x float> %vecins13632, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13632, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1223 = load <4 x float>, <4 x float>* undef %tmp1224 = load <4 x float>, <4 x float>* undef, align 16 %add13634 = fadd <4 x float> %tmp1224, %tmp1223 - store volatile <4 x float> %add13634, <4 x float>* undef, align 16 + store <4 x float> %add13634, <4 x float>* undef, align 16 %vecext13635 = extractelement <4 x float> undef, i32 0 %add13636 = fadd float %vecext13635, 0x406A5999A0000000 %tmp1225 = load <4 x float>, <4 x float>* undef, align 16 %vecins13637 = insertelement <4 x float> %tmp1225, float %add13636, i32 0 - store volatile <4 x float> %vecins13637, <4 x float>* undef, align 16 + store <4 x float> %vecins13637, <4 x float>* undef, align 16 %tmp1226 = load <4 x float>, <4 x float>* undef, align 16 %tmp1227 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13643 = insertelement <4 x float> %tmp1227, float %val, i32 2 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + %vecins13643 = insertelement <4 x float> %tmp1227, float undef, i32 2 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1228 = load <4 x float>, <4 x float>* undef, align 16 - %add13785 = fadd float %val, 0x4068866660000000 + %add13785 = fadd float undef, 0x4068866660000000 %tmp1229 = load <4 x float>, <4 x float>* undef, align 16 %vecins13786 = insertelement <4 x float> %tmp1229, float %add13785, i32 3 - store volatile <4 x float> %vecins13786, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral13787 + store <4 x float> %vecins13786, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral13787 %tmp1230 = load <4 x float>, <4 x float>* undef, align 16 %add13788 = fadd <4 x float> %tmp1230, undef %tmp1231 = load <4 x float>, <4 x float>* undef %tmp1232 = load <4 x float>, <4 x float>* undef, align 16 %add13802 = fadd <4 x float> %tmp1232, %tmp1231 - store volatile <4 x float> %add13802, <4 x float>* undef, align 16 + store <4 x float> %add13802, <4 x float>* undef, align 16 %tmp1233 = load <4 x float>, <4 x float>* undef, align 16 %vecext13803 = extractelement <4 x float> %tmp1233, i32 0 %add13804 = fadd float %vecext13803, -2.900000e+01 %tmp1234 = load <4 x float>, <4 x float>* undef, align 16 %vecins13805 = insertelement <4 x float> %tmp1234, float %add13804, i32 0 - store volatile <4 x float> %vecins13805, <4 x float>* undef, align 16 + store <4 x float> %vecins13805, <4 x float>* undef, align 16 %tmp1235 = load <4 x float>, <4 x float>* undef, align 16 - %add13807 = fadd float %val, 6.400000e+01 + %add13807 = fadd float undef, 6.400000e+01 %tmp1236 = load <4 x float>, <4 x float>* undef, align 16 %tmp1237 = load <4 x float>, <4 x float>* undef, align 16 %vecext13809 = extractelement <4 x float> %tmp1237, i32 2 @@ -6814,28 +6814,28 @@ entry: %vecext13812 = extractelement <4 x float> %tmp1238, i32 3 %add13813 = fadd float %vecext13812, -3.615000e+02 %vecins13814 = insertelement <4 x float> undef, float %add13813, i32 3 - store volatile <4 x float> %vecins13814, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13814, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1239 = load <4 x float>, <4 x float>* undef - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1240 = load <4 x float>, <4 x float>* undef, align 16 %vecext13817 = extractelement <4 x float> %tmp1240, i32 0 - %vecins13856 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins13856, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins13856 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins13856, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1241 = load <4 x float>, <4 x float>* undef %tmp1242 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1243 = load <4 x float>, <4 x float>* undef, align 16 %vecext13859 = extractelement <4 x float> %tmp1243, i32 0 %tmp1244 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13861 = insertelement <4 x float> %tmp1244, float %val, i32 0 + %vecins13861 = insertelement <4 x float> %tmp1244, float undef, i32 0 %tmp1245 = load <4 x float>, <4 x float>* undef, align 16 %vecext13862 = extractelement <4 x float> %tmp1245, i32 1 %add13863 = fadd float %vecext13862, -1.380000e+02 %vecins13864 = insertelement <4 x float> undef, float %add13863, i32 1 - %vecins13867 = insertelement <4 x float> undef, float %val, i32 2 - store volatile <4 x float> %vecins13867, <4 x float>* undef, align 16 + %vecins13867 = insertelement <4 x float> undef, float undef, i32 2 + store <4 x float> %vecins13867, <4 x float>* undef, align 16 %tmp1246 = load <4 x float>, <4 x float>* undef, align 16 %tmp1247 = load <4 x float>, <4 x float>* undef, align 16 ret <4 x float> undef diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll index 546ffdd66ff8e..c0c691fc5b0a6 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll @@ -1,13 +1,13 @@ ; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s ; CHECK-LABEL: danny: -; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0) -; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2) -; CHECK: [[T0]] |= asl([[T1]],#16) -; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4) -; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6) -; CHECK: [[T2]] |= asl([[T3]],#16) -; CHECK: combine([[T2]],[[T0]]) +; CHECK: r1 = r0 +; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r1+#0) +; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r1+#2) +; CHECK: r2 |= asl([[T1]],#16) +; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r1+#4) +; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r1+#6) +; CHECK: r1 |= asl([[T3]],#16) define <4 x i16> @danny(<4 x i16>* %p) { %t0 = load <4 x i16>, <4 x i16>* %p, align 2 ret <4 x i16> %t0 @@ -15,8 +15,8 @@ define <4 x i16> @danny(<4 x i16>* %p) { ; CHECK-LABEL: sammy: ; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0) -; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4) -; CHECK: combine([[T1]],[[T0]]) +; CHECK-DAG: r1 = memw(r0+#4) +; CHECK: r0 = [[T0]] define <4 x i16> @sammy(<4 x i16>* %p) { %t0 = load <4 x i16>, <4 x i16>* %p, align 4 ret <4 x i16> %t0 diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll index 9025af913ad7b..f22fbcc7b73ee 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll @@ -244,12 +244,12 @@ define void @cxiiiiconv() { ; ALL-DAG: lw $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]]) ; ALL-DAG: lbu $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]]) ; 32R1-DAG: sll $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24 - ; 32R1-DAG: sra $5, $[[REG_C1_1]], 24 - ; 32R2-DAG: seb $5, $[[REG_C1]] + ; 32R1-DAG: sra $4, $[[REG_C1_1]], 24 + ; 32R2-DAG: seb $4, $[[REG_C1]] ; FIXME: andi is superfulous ; ALL-DAG: lw $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]]) ; ALL-DAG: lbu $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]]) - ; ALL-DAG: andi $4, $[[REG_UC1]], 255 + ; ALL-DAG: andi $5, $[[REG_UC1]], 255 ; ALL-DAG: lw $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]]) ; ALL-DAG: lhu $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]]) ; 32R1-DAG: sll $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16 diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll index 3e30f75d6f4ae..1a2ad44b0a6b1 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll @@ -17,15 +17,9 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) define void @cpy(i8* %src, i32 %i) { ; ALL-LABEL: cpy: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 24($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: sw $5, 20($sp) - ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) - ; ALL-DAG: move $5, $[[T1]] - ; ALL-DAG: lw $6, 20($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}}) - ; ALL: jalr $[[T2]] + ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}}) + ; ALL: jalr $[[T2]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false) @@ -36,14 +30,8 @@ define void @mov(i8* %src, i32 %i) { ; ALL-LABEL: mov: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 24($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: sw $5, 20($sp) - ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) - ; ALL-DAG: move $5, $[[T1]] - ; ALL-DAG: lw $6, 20($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}}) + ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}}) ; ALL: jalr $[[T2]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} @@ -54,15 +42,8 @@ define void @mov(i8* %src, i32 %i) { define void @clear(i32 %i) { ; ALL-LABEL: clear: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 16($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 42 - ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T1]], 24 - ; 32R1-DAG: sra $5, $[[T2]], 24 - ; 32R2-DAG: seb $5, $[[T1]] - ; ALL-DAG: lw $6, 16($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}}) + ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}}) ; ALL: jalr $[[T2]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll b/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll index 6befe70270dff..e482a13f3d5cb 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll @@ -5,9 +5,10 @@ define void @test(i32 %x, i1* %p) nounwind { ; CHECK-LABEL: test: ; CHECK: # %bb.0: ; CHECK-NEXT: move $1, $4 -; CHECK-NEXT: andi $2, $4, 1 -; CHECK-NEXT: sb $2, 0($5) +; CHECK-NEXT: move $4, $1 ; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: sb $1, 0($5) +; CHECK-NEXT: andi $1, $4, 1 ; CHECK-NEXT: bgtz $1, $BB0_1 ; CHECK-NEXT: nop ; CHECK-NEXT: # %bb.1: # %foo diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll index 0b217b837479d..fe26837d0a344 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -86,12 +86,11 @@ entry: define i64 @add_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: add_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $1, $6, $4 -; MIPS32-NEXT: sltu $2, $1, $4 -; MIPS32-NEXT: addu $3, $7, $5 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: addu $3, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: addu $2, $6, $4 +; MIPS32-NEXT: sltu $3, $2, $4 +; MIPS32-NEXT: addu $1, $7, $5 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -102,34 +101,30 @@ entry: define i128 @add_i128(i128 %a, i128 %b) { ; MIPS32-LABEL: add_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $sp, $sp, -8 -; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: move $8, $4 +; MIPS32-NEXT: move $3, $5 +; MIPS32-NEXT: move $4, $6 +; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $2, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $6, 0($1) ; MIPS32-NEXT: addiu $1, $sp, 24 +; MIPS32-NEXT: lw $5, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 28 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 32 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: addiu $8, $sp, 36 -; MIPS32-NEXT: lw $8, 0($8) -; MIPS32-NEXT: addu $1, $1, $4 -; MIPS32-NEXT: sltu $4, $1, $4 -; MIPS32-NEXT: addu $5, $2, $5 -; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: addu $2, $2, $8 +; MIPS32-NEXT: sltu $8, $2, $8 +; MIPS32-NEXT: addu $3, $6, $3 +; MIPS32-NEXT: andi $8, $8, 1 +; MIPS32-NEXT: addu $3, $3, $8 +; MIPS32-NEXT: sltu $6, $3, $6 ; MIPS32-NEXT: addu $4, $5, $4 -; MIPS32-NEXT: sltu $2, $4, $2 -; MIPS32-NEXT: addu $5, $3, $6 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: addu $2, $5, $2 -; MIPS32-NEXT: sltu $3, $2, $3 -; MIPS32-NEXT: addu $5, $8, $7 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: addu $5, $5, $3 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: move $2, $1 -; MIPS32-NEXT: move $3, $4 -; MIPS32-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: addiu $sp, $sp, 8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: addu $4, $4, $6 +; MIPS32-NEXT: sltu $5, $4, $5 +; MIPS32-NEXT: addu $1, $1, $7 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: addu $5, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll index 5d8585173b995..74ecbf6ed7a84 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll @@ -4,9 +4,9 @@ define void @add_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) { ; P5600-LABEL: add_v16i8: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.b $w0, 0($4) -; P5600-NEXT: ld.b $w1, 0($5) -; P5600-NEXT: addv.b $w0, $w1, $w0 +; P5600-NEXT: ld.b $w1, 0($4) +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: addv.b $w0, $w0, $w1 ; P5600-NEXT: st.b $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -21,9 +21,9 @@ entry: define void @add_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) { ; P5600-LABEL: add_v8i16: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.h $w0, 0($4) -; P5600-NEXT: ld.h $w1, 0($5) -; P5600-NEXT: addv.h $w0, $w1, $w0 +; P5600-NEXT: ld.h $w1, 0($4) +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: addv.h $w0, $w0, $w1 ; P5600-NEXT: st.h $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -38,9 +38,9 @@ entry: define void @add_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) { ; P5600-LABEL: add_v4i32: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.w $w0, 0($4) -; P5600-NEXT: ld.w $w1, 0($5) -; P5600-NEXT: addv.w $w0, $w1, $w0 +; P5600-NEXT: ld.w $w1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: addv.w $w0, $w0, $w1 ; P5600-NEXT: st.w $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -55,9 +55,9 @@ entry: define void @add_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) { ; P5600-LABEL: add_v2i64: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.d $w0, 0($4) -; P5600-NEXT: ld.d $w1, 0($5) -; P5600-NEXT: addv.d $w0, $w1, $w0 +; P5600-NEXT: ld.d $w1, 0($4) +; P5600-NEXT: ld.d $w0, 0($5) +; P5600-NEXT: addv.d $w0, $w0, $w1 ; P5600-NEXT: st.d $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll index a9f49c025b953..32bc78827baf5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll @@ -6,10 +6,10 @@ define { float, float } @add_complex_float({ float, float }* %a, { float, float ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lwc1 $f0, 0($4) ; MIPS32-NEXT: lwc1 $f1, 4($4) -; MIPS32-NEXT: lwc1 $f2, 0($5) -; MIPS32-NEXT: lwc1 $f3, 4($5) -; MIPS32-NEXT: add.s $f0, $f0, $f2 -; MIPS32-NEXT: add.s $f2, $f1, $f3 +; MIPS32-NEXT: lwc1 $f3, 0($5) +; MIPS32-NEXT: lwc1 $f2, 4($5) +; MIPS32-NEXT: add.s $f0, $f0, $f3 +; MIPS32-NEXT: add.s $f2, $f1, $f2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -33,10 +33,10 @@ define { double, double } @add_complex_double({ double, double }* %a, { double, ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ldc1 $f0, 0($4) ; MIPS32-NEXT: ldc1 $f2, 8($4) -; MIPS32-NEXT: ldc1 $f4, 0($5) -; MIPS32-NEXT: ldc1 $f6, 8($5) -; MIPS32-NEXT: add.d $f0, $f0, $f4 -; MIPS32-NEXT: add.d $f2, $f2, $f6 +; MIPS32-NEXT: ldc1 $f6, 0($5) +; MIPS32-NEXT: ldc1 $f4, 8($5) +; MIPS32-NEXT: add.d $f0, $f0, $f6 +; MIPS32-NEXT: add.d $f2, $f2, $f4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -66,9 +66,9 @@ define void @call_ret_complex_float({ float, float }* %z) { ; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal ret_complex_float ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: swc1 $f0, 0($1) -; MIPS32-NEXT: swc1 $f2, 4($1) +; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: swc1 $f0, 0($4) +; MIPS32-NEXT: swc1 $f2, 4($4) ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra @@ -95,9 +95,9 @@ define void @call_ret_complex_double({ double, double }* %z) { ; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal ret_complex_double ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f0, 0($1) -; MIPS32-NEXT: sdc1 $f2, 8($1) +; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sdc1 $f0, 0($4) +; MIPS32-NEXT: sdc1 $f2, 8($4) ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll index 662bcdf757b6d..2bbbe4ac3ae29 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll @@ -6,64 +6,64 @@ declare i32 @llvm.bitreverse.i32(i32) define i32 @bitreverse_i32(i32 signext %a) { ; MIPS32-LABEL: bitreverse_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $4, 24 -; MIPS32-NEXT: srl $2, $4, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: srl $1, $4, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $4, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: srl $2, $4, 8 ; MIPS32-NEXT: andi $2, $2, 65280 -; MIPS32-NEXT: or $1, $1, $2 -; MIPS32-NEXT: lui $2, 61680 -; MIPS32-NEXT: ori $2, $2, 61680 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 4 -; MIPS32-NEXT: sll $1, $1, 4 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: lui $2, 52428 -; MIPS32-NEXT: ori $2, $2, 52428 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 2 -; MIPS32-NEXT: sll $1, $1, 2 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: lui $2, 43690 -; MIPS32-NEXT: ori $2, $2, 43690 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 1 -; MIPS32-NEXT: sll $1, $1, 1 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $2, $3, $1 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 61680 +; MIPS32-NEXT: ori $3, $1, 61680 +; MIPS32-NEXT: and $1, $2, $3 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: sll $2, $2, 4 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 52428 +; MIPS32-NEXT: ori $3, $1, 52428 +; MIPS32-NEXT: and $1, $2, $3 +; MIPS32-NEXT: srl $1, $1, 2 +; MIPS32-NEXT: sll $2, $2, 2 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 43690 +; MIPS32-NEXT: ori $3, $1, 43690 +; MIPS32-NEXT: and $1, $2, $3 +; MIPS32-NEXT: srl $1, $1, 1 +; MIPS32-NEXT: sll $2, $2, 1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: or $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R2-LABEL: bitreverse_i32: ; MIPS32R2: # %bb.0: # %entry ; MIPS32R2-NEXT: wsbh $1, $4 -; MIPS32R2-NEXT: rotr $1, $1, 16 -; MIPS32R2-NEXT: lui $2, 61680 -; MIPS32R2-NEXT: ori $2, $2, 61680 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 4 -; MIPS32R2-NEXT: sll $1, $1, 4 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $1, $3, $1 -; MIPS32R2-NEXT: lui $2, 52428 -; MIPS32R2-NEXT: ori $2, $2, 52428 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 2 -; MIPS32R2-NEXT: sll $1, $1, 2 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $1, $3, $1 -; MIPS32R2-NEXT: lui $2, 43690 -; MIPS32R2-NEXT: ori $2, $2, 43690 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 1 -; MIPS32R2-NEXT: sll $1, $1, 1 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $2, $3, $1 +; MIPS32R2-NEXT: rotr $2, $1, 16 +; MIPS32R2-NEXT: lui $1, 61680 +; MIPS32R2-NEXT: ori $3, $1, 61680 +; MIPS32R2-NEXT: and $1, $2, $3 +; MIPS32R2-NEXT: srl $1, $1, 4 +; MIPS32R2-NEXT: sll $2, $2, 4 +; MIPS32R2-NEXT: and $2, $2, $3 +; MIPS32R2-NEXT: or $2, $1, $2 +; MIPS32R2-NEXT: lui $1, 52428 +; MIPS32R2-NEXT: ori $3, $1, 52428 +; MIPS32R2-NEXT: and $1, $2, $3 +; MIPS32R2-NEXT: srl $1, $1, 2 +; MIPS32R2-NEXT: sll $2, $2, 2 +; MIPS32R2-NEXT: and $2, $2, $3 +; MIPS32R2-NEXT: or $2, $1, $2 +; MIPS32R2-NEXT: lui $1, 43690 +; MIPS32R2-NEXT: ori $3, $1, 43690 +; MIPS32R2-NEXT: and $1, $2, $3 +; MIPS32R2-NEXT: srl $1, $1, 1 +; MIPS32R2-NEXT: sll $2, $2, 1 +; MIPS32R2-NEXT: and $2, $2, $3 +; MIPS32R2-NEXT: or $2, $1, $2 ; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: nop entry: @@ -75,107 +75,107 @@ declare i64 @llvm.bitreverse.i64(i64) define i64 @bitreverse_i64(i64 signext %a) { ; MIPS32-LABEL: bitreverse_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $5, 24 -; MIPS32-NEXT: srl $2, $5, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: sll $2, $5, 24 +; MIPS32-NEXT: srl $1, $5, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $5, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: srl $2, $5, 8 ; MIPS32-NEXT: andi $2, $2, 65280 -; MIPS32-NEXT: or $1, $1, $2 -; MIPS32-NEXT: lui $2, 61680 -; MIPS32-NEXT: ori $2, $2, 61680 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 4 -; MIPS32-NEXT: sll $1, $1, 4 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: lui $3, 52428 -; MIPS32-NEXT: ori $3, $3, 52428 -; MIPS32-NEXT: and $5, $1, $3 -; MIPS32-NEXT: srl $5, $5, 2 -; MIPS32-NEXT: sll $1, $1, 2 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: or $1, $5, $1 -; MIPS32-NEXT: lui $5, 43690 -; MIPS32-NEXT: ori $5, $5, 43690 -; MIPS32-NEXT: and $6, $1, $5 -; MIPS32-NEXT: srl $6, $6, 1 -; MIPS32-NEXT: sll $1, $1, 1 -; MIPS32-NEXT: and $1, $1, $5 -; MIPS32-NEXT: or $1, $6, $1 -; MIPS32-NEXT: sll $6, $4, 24 -; MIPS32-NEXT: srl $7, $4, 24 -; MIPS32-NEXT: or $6, $7, $6 -; MIPS32-NEXT: andi $7, $4, 65280 -; MIPS32-NEXT: sll $7, $7, 8 -; MIPS32-NEXT: or $6, $6, $7 -; MIPS32-NEXT: srl $4, $4, 8 -; MIPS32-NEXT: andi $4, $4, 65280 -; MIPS32-NEXT: or $4, $6, $4 -; MIPS32-NEXT: and $6, $4, $2 -; MIPS32-NEXT: srl $6, $6, 4 -; MIPS32-NEXT: sll $4, $4, 4 -; MIPS32-NEXT: and $2, $4, $2 -; MIPS32-NEXT: or $2, $6, $2 -; MIPS32-NEXT: and $4, $2, $3 -; MIPS32-NEXT: srl $4, $4, 2 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 61680 +; MIPS32-NEXT: ori $6, $1, 61680 +; MIPS32-NEXT: and $1, $2, $6 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: sll $2, $2, 4 +; MIPS32-NEXT: and $2, $2, $6 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 52428 +; MIPS32-NEXT: ori $5, $1, 52428 +; MIPS32-NEXT: and $1, $2, $5 +; MIPS32-NEXT: srl $1, $1, 2 ; MIPS32-NEXT: sll $2, $2, 2 -; MIPS32-NEXT: and $2, $2, $3 -; MIPS32-NEXT: or $2, $4, $2 -; MIPS32-NEXT: and $3, $2, $5 -; MIPS32-NEXT: srl $3, $3, 1 -; MIPS32-NEXT: sll $2, $2, 1 ; MIPS32-NEXT: and $2, $2, $5 -; MIPS32-NEXT: or $3, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 43690 +; MIPS32-NEXT: ori $4, $1, 43690 +; MIPS32-NEXT: and $1, $2, $4 +; MIPS32-NEXT: srl $1, $1, 1 +; MIPS32-NEXT: sll $2, $2, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: sll $7, $3, 24 +; MIPS32-NEXT: srl $1, $3, 24 +; MIPS32-NEXT: or $1, $1, $7 +; MIPS32-NEXT: andi $7, $3, 65280 +; MIPS32-NEXT: sll $7, $7, 8 +; MIPS32-NEXT: or $1, $1, $7 +; MIPS32-NEXT: srl $3, $3, 8 +; MIPS32-NEXT: andi $3, $3, 65280 +; MIPS32-NEXT: or $3, $1, $3 +; MIPS32-NEXT: and $1, $3, $6 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: sll $3, $3, 4 +; MIPS32-NEXT: and $3, $3, $6 +; MIPS32-NEXT: or $3, $1, $3 +; MIPS32-NEXT: and $1, $3, $5 +; MIPS32-NEXT: srl $1, $1, 2 +; MIPS32-NEXT: sll $3, $3, 2 +; MIPS32-NEXT: and $3, $3, $5 +; MIPS32-NEXT: or $3, $1, $3 +; MIPS32-NEXT: and $1, $3, $4 +; MIPS32-NEXT: srl $1, $1, 1 +; MIPS32-NEXT: sll $3, $3, 1 +; MIPS32-NEXT: and $3, $3, $4 +; MIPS32-NEXT: or $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R2-LABEL: bitreverse_i64: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: wsbh $1, $5 -; MIPS32R2-NEXT: rotr $1, $1, 16 +; MIPS32R2-NEXT: move $1, $4 +; MIPS32R2-NEXT: wsbh $2, $5 +; MIPS32R2-NEXT: rotr $3, $2, 16 ; MIPS32R2-NEXT: lui $2, 61680 -; MIPS32R2-NEXT: ori $2, $2, 61680 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 4 -; MIPS32R2-NEXT: sll $1, $1, 4 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $1, $3, $1 -; MIPS32R2-NEXT: lui $3, 52428 -; MIPS32R2-NEXT: ori $3, $3, 52428 -; MIPS32R2-NEXT: and $5, $1, $3 -; MIPS32R2-NEXT: srl $5, $5, 2 -; MIPS32R2-NEXT: sll $1, $1, 2 -; MIPS32R2-NEXT: and $1, $1, $3 -; MIPS32R2-NEXT: or $1, $5, $1 -; MIPS32R2-NEXT: lui $5, 43690 -; MIPS32R2-NEXT: ori $5, $5, 43690 -; MIPS32R2-NEXT: and $6, $1, $5 -; MIPS32R2-NEXT: srl $6, $6, 1 -; MIPS32R2-NEXT: sll $1, $1, 1 -; MIPS32R2-NEXT: and $1, $1, $5 -; MIPS32R2-NEXT: or $1, $6, $1 -; MIPS32R2-NEXT: wsbh $4, $4 -; MIPS32R2-NEXT: rotr $4, $4, 16 -; MIPS32R2-NEXT: and $6, $4, $2 -; MIPS32R2-NEXT: srl $6, $6, 4 -; MIPS32R2-NEXT: sll $4, $4, 4 -; MIPS32R2-NEXT: and $2, $4, $2 -; MIPS32R2-NEXT: or $2, $6, $2 -; MIPS32R2-NEXT: and $4, $2, $3 -; MIPS32R2-NEXT: srl $4, $4, 2 -; MIPS32R2-NEXT: sll $2, $2, 2 -; MIPS32R2-NEXT: and $2, $2, $3 -; MIPS32R2-NEXT: or $2, $4, $2 -; MIPS32R2-NEXT: and $3, $2, $5 -; MIPS32R2-NEXT: srl $3, $3, 1 -; MIPS32R2-NEXT: sll $2, $2, 1 -; MIPS32R2-NEXT: and $2, $2, $5 -; MIPS32R2-NEXT: or $3, $3, $2 -; MIPS32R2-NEXT: move $2, $1 +; MIPS32R2-NEXT: ori $6, $2, 61680 +; MIPS32R2-NEXT: and $2, $3, $6 +; MIPS32R2-NEXT: srl $2, $2, 4 +; MIPS32R2-NEXT: sll $3, $3, 4 +; MIPS32R2-NEXT: and $3, $3, $6 +; MIPS32R2-NEXT: or $3, $2, $3 +; MIPS32R2-NEXT: lui $2, 52428 +; MIPS32R2-NEXT: ori $5, $2, 52428 +; MIPS32R2-NEXT: and $2, $3, $5 +; MIPS32R2-NEXT: srl $2, $2, 2 +; MIPS32R2-NEXT: sll $3, $3, 2 +; MIPS32R2-NEXT: and $3, $3, $5 +; MIPS32R2-NEXT: or $3, $2, $3 +; MIPS32R2-NEXT: lui $2, 43690 +; MIPS32R2-NEXT: ori $4, $2, 43690 +; MIPS32R2-NEXT: and $2, $3, $4 +; MIPS32R2-NEXT: srl $2, $2, 1 +; MIPS32R2-NEXT: sll $3, $3, 1 +; MIPS32R2-NEXT: and $3, $3, $4 +; MIPS32R2-NEXT: or $2, $2, $3 +; MIPS32R2-NEXT: wsbh $1, $1 +; MIPS32R2-NEXT: rotr $3, $1, 16 +; MIPS32R2-NEXT: and $1, $3, $6 +; MIPS32R2-NEXT: srl $1, $1, 4 +; MIPS32R2-NEXT: sll $3, $3, 4 +; MIPS32R2-NEXT: and $3, $3, $6 +; MIPS32R2-NEXT: or $3, $1, $3 +; MIPS32R2-NEXT: and $1, $3, $5 +; MIPS32R2-NEXT: srl $1, $1, 2 +; MIPS32R2-NEXT: sll $3, $3, 2 +; MIPS32R2-NEXT: and $3, $3, $5 +; MIPS32R2-NEXT: or $3, $1, $3 +; MIPS32R2-NEXT: and $1, $3, $4 +; MIPS32R2-NEXT: srl $1, $1, 1 +; MIPS32R2-NEXT: sll $3, $3, 1 +; MIPS32R2-NEXT: and $3, $3, $4 +; MIPS32R2-NEXT: or $3, $1, $3 ; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll index 4022efcafb644..803b76cbc51ab 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll @@ -320,10 +320,10 @@ define i8 @ashr_i8(i8 %a) { ; MIPS32-LABEL: ashr_i8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 2 -; MIPS32-NEXT: andi $1, $1, 255 -; MIPS32-NEXT: sll $2, $4, 24 -; MIPS32-NEXT: sra $2, $2, 24 -; MIPS32-NEXT: srav $2, $2, $1 +; MIPS32-NEXT: andi $2, $1, 255 +; MIPS32-NEXT: sll $1, $4, 24 +; MIPS32-NEXT: sra $1, $1, 24 +; MIPS32-NEXT: srav $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -335,9 +335,9 @@ define i16 @lshr_i16(i16 %a) { ; MIPS32-LABEL: lshr_i16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 2 -; MIPS32-NEXT: andi $1, $1, 65535 -; MIPS32-NEXT: andi $2, $4, 65535 -; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: andi $2, $1, 65535 +; MIPS32-NEXT: andi $1, $4, 65535 +; MIPS32-NEXT: srlv $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -348,29 +348,25 @@ entry: define i64 @shl_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: shl_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $sp, $sp, -8 -; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: move $9, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $2, $6, $1 -; MIPS32-NEXT: subu $3, $1, $6 -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: sltu $1, $6, $1 -; MIPS32-NEXT: sltiu $9, $6, 1 -; MIPS32-NEXT: sllv $10, $4, $6 -; MIPS32-NEXT: srlv $3, $4, $3 -; MIPS32-NEXT: sllv $6, $5, $6 -; MIPS32-NEXT: or $3, $3, $6 -; MIPS32-NEXT: sllv $2, $4, $2 -; MIPS32-NEXT: andi $4, $1, 1 -; MIPS32-NEXT: movn $8, $10, $4 +; MIPS32-NEXT: subu $8, $9, $1 +; MIPS32-NEXT: subu $4, $1, $9 +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: sltu $6, $9, $1 +; MIPS32-NEXT: sltiu $1, $9, 1 +; MIPS32-NEXT: sllv $7, $3, $9 +; MIPS32-NEXT: srlv $4, $3, $4 +; MIPS32-NEXT: sllv $9, $5, $9 +; MIPS32-NEXT: or $4, $4, $9 +; MIPS32-NEXT: sllv $3, $3, $8 +; MIPS32-NEXT: andi $8, $6, 1 +; MIPS32-NEXT: movn $2, $7, $8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: movn $3, $4, $6 ; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $2, $3, $1 -; MIPS32-NEXT: andi $1, $9, 1 -; MIPS32-NEXT: movn $2, $5, $1 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: move $2, $8 -; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: addiu $sp, $sp, 8 +; MIPS32-NEXT: movn $3, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -381,24 +377,30 @@ entry: define i64 @ashl_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ashl_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $2, $6, $1 -; MIPS32-NEXT: subu $3, $1, $6 -; MIPS32-NEXT: sltu $1, $6, $1 -; MIPS32-NEXT: sltiu $8, $6, 1 -; MIPS32-NEXT: srav $9, $5, $6 -; MIPS32-NEXT: srlv $6, $4, $6 -; MIPS32-NEXT: sllv $3, $5, $3 -; MIPS32-NEXT: or $3, $6, $3 -; MIPS32-NEXT: sra $6, $5, 31 -; MIPS32-NEXT: srav $2, $5, $2 -; MIPS32-NEXT: andi $5, $1, 1 -; MIPS32-NEXT: movn $2, $3, $5 -; MIPS32-NEXT: andi $3, $8, 1 -; MIPS32-NEXT: movn $2, $4, $3 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $6, $9, $1 +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $2, $5 +; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $3, $6 +; MIPS32-NEXT: ori $1, $zero, 32 +; MIPS32-NEXT: subu $8, $3, $1 +; MIPS32-NEXT: subu $7, $1, $3 +; MIPS32-NEXT: sltu $4, $3, $1 +; MIPS32-NEXT: sltiu $6, $3, 1 +; MIPS32-NEXT: srav $1, $2, $3 +; MIPS32-NEXT: srlv $3, $5, $3 +; MIPS32-NEXT: sllv $7, $2, $7 +; MIPS32-NEXT: or $7, $3, $7 +; MIPS32-NEXT: sra $3, $2, 31 +; MIPS32-NEXT: srav $2, $2, $8 +; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: movn $2, $7, $8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: movn $2, $5, $6 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -409,24 +411,30 @@ entry: define i64 @lshr_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: lshr_i64: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $2, $5 +; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: move $7, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $2, $6, $1 -; MIPS32-NEXT: subu $3, $1, $6 -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: sltu $1, $6, $1 -; MIPS32-NEXT: sltiu $9, $6, 1 -; MIPS32-NEXT: srlv $10, $5, $6 -; MIPS32-NEXT: srlv $6, $4, $6 -; MIPS32-NEXT: sllv $3, $5, $3 -; MIPS32-NEXT: or $3, $6, $3 -; MIPS32-NEXT: srlv $2, $5, $2 -; MIPS32-NEXT: andi $5, $1, 1 -; MIPS32-NEXT: movn $2, $3, $5 -; MIPS32-NEXT: andi $3, $9, 1 -; MIPS32-NEXT: movn $2, $4, $3 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $8, $10, $1 -; MIPS32-NEXT: move $3, $8 +; MIPS32-NEXT: subu $8, $7, $1 +; MIPS32-NEXT: subu $9, $1, $7 +; MIPS32-NEXT: ori $3, $zero, 0 +; MIPS32-NEXT: sltu $4, $7, $1 +; MIPS32-NEXT: sltiu $6, $7, 1 +; MIPS32-NEXT: srlv $1, $2, $7 +; MIPS32-NEXT: srlv $7, $5, $7 +; MIPS32-NEXT: sllv $9, $2, $9 +; MIPS32-NEXT: or $7, $7, $9 +; MIPS32-NEXT: srlv $2, $2, $8 +; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: movn $2, $7, $8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: movn $2, $5, $6 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll index 4600142cb9be4..6ce601a94ef77 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll @@ -30,21 +30,21 @@ define i32 @Conditional_branch(i1 %cond, i32 %a, i32 %b) { ; MIPS32: # %bb.0: ; MIPS32-NEXT: addiu $sp, $sp, -8 ; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $5, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: ; MIPS32-NEXT: j $BB1_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %if.then -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_3: # %if.else -; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll index 568558538cdb1..9bb803f4cfd3c 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll @@ -6,19 +6,19 @@ define i32 @indirectbr(i8 *%addr) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -8 ; MIPS32-NEXT: .cfi_def_cfa_offset 8 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: ori $1, $zero, 0 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 1 ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jr $4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_1: # %L1 -; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %L2 -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll index 3b371cca6fe90..b168e13b7f55b 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll @@ -6,9 +6,9 @@ declare i32 @llvm.bswap.i32(i32) define i32 @bswap_i32(i32 %x) { ; MIPS32-LABEL: bswap_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $4, 24 -; MIPS32-NEXT: srl $2, $4, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: srl $1, $4, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $4, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 @@ -33,18 +33,18 @@ declare i64 @llvm.bswap.i64(i64) define i64 @bswap_i64(i64 %x) { ; MIPS32-LABEL: bswap_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $5, 24 -; MIPS32-NEXT: srl $2, $5, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sll $2, $5, 24 +; MIPS32-NEXT: srl $1, $5, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $5, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: srl $2, $5, 8 ; MIPS32-NEXT: andi $2, $2, 65280 ; MIPS32-NEXT: or $2, $1, $2 -; MIPS32-NEXT: sll $1, $4, 24 -; MIPS32-NEXT: srl $3, $4, 24 -; MIPS32-NEXT: or $1, $3, $1 +; MIPS32-NEXT: sll $3, $4, 24 +; MIPS32-NEXT: srl $1, $4, 24 +; MIPS32-NEXT: or $1, $1, $3 ; MIPS32-NEXT: andi $3, $4, 65280 ; MIPS32-NEXT: sll $3, $3, 8 ; MIPS32-NEXT: or $1, $1, $3 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll index f7952e4462361..0312f49fa6ee7 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll @@ -29,11 +29,10 @@ define i32 @call_global(i32 %a0, i32 %a1, i32 %x, i32 %y) { ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $25, %call16(f)($1) +; MIPS32_PIC-NEXT: addu $gp, $2, $25 ; MIPS32_PIC-NEXT: move $4, $6 ; MIPS32_PIC-NEXT: move $5, $7 -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: lw $25, %call16(f)($gp) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: addu $2, $2, $2 @@ -89,12 +88,11 @@ define i32 @call_global_with_local_linkage(i32 %a0, i32 %a1, i32 %x, i32 %y) { ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $2, %got(f_with_local_linkage)($1) -; MIPS32_PIC-NEXT: addiu $25, $2, %lo(f_with_local_linkage) +; MIPS32_PIC-NEXT: addu $gp, $2, $25 ; MIPS32_PIC-NEXT: move $4, $6 ; MIPS32_PIC-NEXT: move $5, $7 -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: lw $1, %got(f_with_local_linkage)($gp) +; MIPS32_PIC-NEXT: addiu $25, $1, %lo(f_with_local_linkage) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: addu $2, $2, $2 @@ -115,10 +113,9 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) { ; MIPS32-NEXT: .cfi_def_cfa_offset 24 ; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $25, $4 ; MIPS32-NEXT: move $4, $5 ; MIPS32-NEXT: move $5, $6 -; MIPS32-NEXT: lw $25, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jalr $25 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload @@ -132,10 +129,9 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) { ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: sw $4, 16($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: move $25, $4 ; MIPS32_PIC-NEXT: move $4, $5 ; MIPS32_PIC-NEXT: move $5, $6 -; MIPS32_PIC-NEXT: lw $25, 16($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll index 4030cfbf57e68..65fce9d4f5d59 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll @@ -17,14 +17,14 @@ declare i32 @llvm.ctlz.i32(i32, i1 immarg) define i64 @ctlz_i64(i64 %a) { ; MIPS32-LABEL: ctlz_i64: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $1, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $1, $5, 1 -; MIPS32-NEXT: clz $2, $4 -; MIPS32-NEXT: addiu $2, $2, 32 -; MIPS32-NEXT: clz $4, $5 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $4, $2, $1 -; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: sltiu $4, $5, 1 +; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: addiu $1, $1, 32 +; MIPS32-NEXT: clz $2, $5 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll index 5d7a2f23eac19..7ac9c4332feda 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll @@ -8,15 +8,15 @@ define i32 @ctpop_i32(i32 %a) { ; MIPS32-NEXT: lui $2, 21845 ; MIPS32-NEXT: ori $2, $2, 21845 ; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: subu $1, $4, $1 -; MIPS32-NEXT: srl $2, $1, 2 +; MIPS32-NEXT: subu $2, $4, $1 +; MIPS32-NEXT: srl $1, $2, 2 ; MIPS32-NEXT: lui $3, 13107 ; MIPS32-NEXT: ori $3, $3, 13107 -; MIPS32-NEXT: and $2, $2, $3 ; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: addu $1, $2, $1 -; MIPS32-NEXT: srl $2, $1, 4 -; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: addu $2, $1, $2 +; MIPS32-NEXT: srl $1, $2, 4 +; MIPS32-NEXT: addu $1, $1, $2 ; MIPS32-NEXT: lui $2, 3855 ; MIPS32-NEXT: ori $2, $2, 3855 ; MIPS32-NEXT: and $1, $1, $2 @@ -38,37 +38,37 @@ define i64 @ctpop_i64(i64 %a) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: srl $1, $4, 1 ; MIPS32-NEXT: lui $2, 21845 -; MIPS32-NEXT: ori $2, $2, 21845 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: subu $1, $4, $1 -; MIPS32-NEXT: srl $3, $1, 2 -; MIPS32-NEXT: lui $4, 13107 -; MIPS32-NEXT: ori $4, $4, 13107 -; MIPS32-NEXT: and $3, $3, $4 +; MIPS32-NEXT: ori $7, $2, 21845 +; MIPS32-NEXT: and $1, $1, $7 +; MIPS32-NEXT: subu $2, $4, $1 +; MIPS32-NEXT: srl $1, $2, 2 +; MIPS32-NEXT: lui $3, 13107 +; MIPS32-NEXT: ori $6, $3, 13107 +; MIPS32-NEXT: and $1, $1, $6 +; MIPS32-NEXT: and $2, $2, $6 +; MIPS32-NEXT: addu $2, $1, $2 +; MIPS32-NEXT: srl $1, $2, 4 +; MIPS32-NEXT: addu $1, $1, $2 +; MIPS32-NEXT: lui $2, 3855 +; MIPS32-NEXT: ori $4, $2, 3855 ; MIPS32-NEXT: and $1, $1, $4 -; MIPS32-NEXT: addu $1, $3, $1 -; MIPS32-NEXT: srl $3, $1, 4 -; MIPS32-NEXT: addu $1, $3, $1 -; MIPS32-NEXT: lui $3, 3855 -; MIPS32-NEXT: ori $3, $3, 3855 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: lui $6, 257 -; MIPS32-NEXT: ori $6, $6, 257 -; MIPS32-NEXT: mul $1, $1, $6 +; MIPS32-NEXT: lui $2, 257 +; MIPS32-NEXT: ori $3, $2, 257 +; MIPS32-NEXT: mul $1, $1, $3 +; MIPS32-NEXT: srl $2, $1, 24 +; MIPS32-NEXT: srl $1, $5, 1 +; MIPS32-NEXT: and $1, $1, $7 +; MIPS32-NEXT: subu $5, $5, $1 +; MIPS32-NEXT: srl $1, $5, 2 +; MIPS32-NEXT: and $1, $1, $6 +; MIPS32-NEXT: and $5, $5, $6 +; MIPS32-NEXT: addu $5, $1, $5 +; MIPS32-NEXT: srl $1, $5, 4 +; MIPS32-NEXT: addu $1, $1, $5 +; MIPS32-NEXT: and $1, $1, $4 +; MIPS32-NEXT: mul $1, $1, $3 ; MIPS32-NEXT: srl $1, $1, 24 -; MIPS32-NEXT: srl $7, $5, 1 -; MIPS32-NEXT: and $2, $7, $2 -; MIPS32-NEXT: subu $2, $5, $2 -; MIPS32-NEXT: srl $5, $2, 2 -; MIPS32-NEXT: and $5, $5, $4 -; MIPS32-NEXT: and $2, $2, $4 -; MIPS32-NEXT: addu $2, $5, $2 -; MIPS32-NEXT: srl $4, $2, 4 -; MIPS32-NEXT: addu $2, $4, $2 -; MIPS32-NEXT: and $2, $2, $3 -; MIPS32-NEXT: mul $2, $2, $6 -; MIPS32-NEXT: srl $2, $2, 24 -; MIPS32-NEXT: addu $2, $2, $1 +; MIPS32-NEXT: addu $2, $1, $2 ; MIPS32-NEXT: ori $3, $zero, 0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll index 3ea5329da548e..44a2e619f7156 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll @@ -6,10 +6,10 @@ define i32 @cttz_i32(i32 %a) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: not $1, $4 ; MIPS32-NEXT: addiu $2, $4, -1 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: ori $2, $zero, 32 -; MIPS32-NEXT: clz $1, $1 -; MIPS32-NEXT: subu $2, $2, $1 +; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: ori $1, $zero, 32 +; MIPS32-NEXT: clz $2, $2 +; MIPS32-NEXT: subu $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -21,23 +21,23 @@ declare i32 @llvm.cttz.i32(i32, i1 immarg) define i64 @cttz_i64(i64 %a) { ; MIPS32-LABEL: cttz_i64: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $6, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $1, $4, 1 -; MIPS32-NEXT: not $2, $5 -; MIPS32-NEXT: addiu $5, $5, -1 -; MIPS32-NEXT: and $2, $2, $5 -; MIPS32-NEXT: ori $5, $zero, 32 -; MIPS32-NEXT: clz $2, $2 -; MIPS32-NEXT: subu $2, $5, $2 -; MIPS32-NEXT: addiu $2, $2, 32 -; MIPS32-NEXT: not $6, $4 -; MIPS32-NEXT: addiu $4, $4, -1 -; MIPS32-NEXT: and $4, $6, $4 -; MIPS32-NEXT: clz $4, $4 -; MIPS32-NEXT: subu $4, $5, $4 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $4, $2, $1 -; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: sltiu $4, $6, 1 +; MIPS32-NEXT: not $1, $5 +; MIPS32-NEXT: addiu $2, $5, -1 +; MIPS32-NEXT: and $1, $1, $2 +; MIPS32-NEXT: ori $2, $zero, 32 +; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: addiu $1, $1, 32 +; MIPS32-NEXT: not $5, $6 +; MIPS32-NEXT: addiu $6, $6, -1 +; MIPS32-NEXT: and $5, $5, $6 +; MIPS32-NEXT: clz $5, $5 +; MIPS32-NEXT: subu $2, $2, $5 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -53,10 +53,10 @@ define i32 @ffs_i32_expansion(i32 %a) { ; MIPS32-NEXT: ori $1, $zero, 0 ; MIPS32-NEXT: not $2, $4 ; MIPS32-NEXT: addiu $3, $4, -1 -; MIPS32-NEXT: and $2, $2, $3 -; MIPS32-NEXT: ori $3, $zero, 32 -; MIPS32-NEXT: clz $2, $2 -; MIPS32-NEXT: subu $2, $3, $2 +; MIPS32-NEXT: and $3, $2, $3 +; MIPS32-NEXT: ori $2, $zero, 32 +; MIPS32-NEXT: clz $3, $3 +; MIPS32-NEXT: subu $2, $2, $3 ; MIPS32-NEXT: addiu $2, $2, 1 ; MIPS32-NEXT: sltiu $3, $4, 1 ; MIPS32-NEXT: andi $3, $3, 1 @@ -74,37 +74,35 @@ entry: define i64 @ffs_i64_expansion(i64 %a) { ; MIPS32-LABEL: ffs_i64_expansion: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: sltiu $3, $4, 1 -; MIPS32-NEXT: not $6, $5 -; MIPS32-NEXT: addiu $7, $5, -1 -; MIPS32-NEXT: and $6, $6, $7 -; MIPS32-NEXT: ori $7, $zero, 32 +; MIPS32-NEXT: ori $3, $zero, 1 +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sltiu $7, $4, 1 +; MIPS32-NEXT: not $2, $5 +; MIPS32-NEXT: addiu $6, $5, -1 +; MIPS32-NEXT: and $6, $2, $6 +; MIPS32-NEXT: ori $2, $zero, 32 ; MIPS32-NEXT: clz $6, $6 -; MIPS32-NEXT: subu $6, $7, $6 +; MIPS32-NEXT: subu $6, $2, $6 ; MIPS32-NEXT: addiu $6, $6, 32 ; MIPS32-NEXT: not $8, $4 ; MIPS32-NEXT: addiu $9, $4, -1 ; MIPS32-NEXT: and $8, $8, $9 ; MIPS32-NEXT: clz $8, $8 -; MIPS32-NEXT: subu $7, $7, $8 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $7, $6, $3 -; MIPS32-NEXT: addiu $3, $7, 1 -; MIPS32-NEXT: sltu $1, $3, $1 -; MIPS32-NEXT: addiu $6, $2, 0 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: addu $1, $6, $1 +; MIPS32-NEXT: subu $2, $2, $8 +; MIPS32-NEXT: andi $7, $7, 1 +; MIPS32-NEXT: movn $2, $6, $7 +; MIPS32-NEXT: addiu $2, $2, 1 +; MIPS32-NEXT: sltu $6, $2, $3 +; MIPS32-NEXT: addiu $3, $1, 0 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: addu $3, $3, $6 ; MIPS32-NEXT: xori $4, $4, 0 ; MIPS32-NEXT: xori $5, $5, 0 ; MIPS32-NEXT: or $4, $4, $5 ; MIPS32-NEXT: sltiu $4, $4, 1 ; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $3, $2, $4 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: move $2, $3 -; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $2, $1, $4 +; MIPS32-NEXT: movn $3, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll index fcc2d6ef0a930..294bc71443ea5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll @@ -15,35 +15,32 @@ define void @Print_c_N_times(i8 %c, i32 %N) { ; MIPS32-NEXT: .cfi_offset 30, -8 ; MIPS32-NEXT: move $fp, $sp ; MIPS32-NEXT: .cfi_def_cfa_register 30 -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: addiu $3, $5, 1 -; MIPS32-NEXT: mul $1, $3, $1 +; MIPS32-NEXT: sw $4, 8($fp) # 4-byte Folded Spill +; MIPS32-NEXT: move $6, $5 +; MIPS32-NEXT: lw $5, 8($fp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $6, 12($fp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 1 +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sw $1, 16($fp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $6, 1 +; MIPS32-NEXT: mul $1, $1, $2 ; MIPS32-NEXT: addiu $1, $1, 7 -; MIPS32-NEXT: addiu $3, $zero, 65528 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: move $3, $sp -; MIPS32-NEXT: subu $1, $3, $1 -; MIPS32-NEXT: move $sp, $1 -; MIPS32-NEXT: addiu $sp, $sp, -16 +; MIPS32-NEXT: addiu $2, $zero, 65528 +; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: move $1, $sp +; MIPS32-NEXT: subu $4, $1, $2 ; MIPS32-NEXT: sw $4, 20($fp) # 4-byte Folded Spill -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: lw $3, 20($fp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $5, 16($fp) # 4-byte Folded Spill -; MIPS32-NEXT: move $5, $3 -; MIPS32-NEXT: lw $6, 16($fp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $2, 12($fp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $1, 8($fp) # 4-byte Folded Spill +; MIPS32-NEXT: move $sp, $4 +; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: jal memset ; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $5, 12($fp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 16($fp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 20($fp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 -; MIPS32-NEXT: lw $1, 8($fp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 16($fp) # 4-byte Folded Reload -; MIPS32-NEXT: addu $3, $1, $2 -; MIPS32-NEXT: lw $4, 12($fp) # 4-byte Folded Reload -; MIPS32-NEXT: sb $4, 0($3) +; MIPS32-NEXT: addu $2, $4, $5 +; MIPS32-NEXT: sb $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, -16 -; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: jal puts ; MIPS32-NEXT: nop ; MIPS32-NEXT: addiu $sp, $sp, 16 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll index 58d5c8a160a6b..bfff4e72d0ab3 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll @@ -27,10 +27,9 @@ entry: define i1 @uno_s(float %x, float %y) { ; MIPS32-LABEL: uno_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -40,10 +39,9 @@ entry: define i1 @ord_s(float %x, float %y) { ; MIPS32-LABEL: ord_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -55,10 +53,9 @@ entry: define i1 @oeq_s(float %x, float %y) { ; MIPS32-LABEL: oeq_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -68,10 +65,9 @@ entry: define i1 @une_s(float %x, float %y) { ; MIPS32-LABEL: une_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -83,10 +79,9 @@ entry: define i1 @ueq_s(float %x, float %y) { ; MIPS32-LABEL: ueq_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -96,10 +91,9 @@ entry: define i1 @one_s(float %x, float %y) { ; MIPS32-LABEL: one_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -111,10 +105,9 @@ entry: define i1 @olt_s(float %x, float %y) { ; MIPS32-LABEL: olt_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -124,10 +117,9 @@ entry: define i1 @uge_s(float %x, float %y) { ; MIPS32-LABEL: uge_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -139,10 +131,9 @@ entry: define i1 @ult_s(float %x, float %y) { ; MIPS32-LABEL: ult_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -152,10 +143,9 @@ entry: define i1 @oge_s(float %x, float %y) { ; MIPS32-LABEL: oge_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -167,10 +157,9 @@ entry: define i1 @ole_s(float %x, float %y) { ; MIPS32-LABEL: ole_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -180,10 +169,9 @@ entry: define i1 @ugt_s(float %x, float %y) { ; MIPS32-LABEL: ugt_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -195,10 +183,9 @@ entry: define i1 @ule_s(float %x, float %y) { ; MIPS32-LABEL: ule_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -208,10 +195,9 @@ entry: define i1 @ogt_s(float %x, float %y) { ; MIPS32-LABEL: ogt_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -245,10 +231,9 @@ entry: define i1 @uno_d(double %x, double %y) { ; MIPS32-LABEL: uno_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -258,10 +243,9 @@ entry: define i1 @ord_d(double %x, double %y) { ; MIPS32-LABEL: ord_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -273,10 +257,9 @@ entry: define i1 @oeq_d(double %x, double %y) { ; MIPS32-LABEL: oeq_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -286,10 +269,9 @@ entry: define i1 @une_d(double %x, double %y) { ; MIPS32-LABEL: une_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -301,10 +283,9 @@ entry: define i1 @ueq_d(double %x, double %y) { ; MIPS32-LABEL: ueq_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -314,10 +295,9 @@ entry: define i1 @one_d(double %x, double %y) { ; MIPS32-LABEL: one_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -329,10 +309,9 @@ entry: define i1 @olt_d(double %x, double %y) { ; MIPS32-LABEL: olt_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -342,10 +321,9 @@ entry: define i1 @uge_d(double %x, double %y) { ; MIPS32-LABEL: uge_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -357,10 +335,9 @@ entry: define i1 @ult_d(double %x, double %y) { ; MIPS32-LABEL: ult_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -370,10 +347,9 @@ entry: define i1 @oge_d(double %x, double %y) { ; MIPS32-LABEL: oge_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -385,10 +361,9 @@ entry: define i1 @ole_d(double %x, double %y) { ; MIPS32-LABEL: ole_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -398,10 +373,9 @@ entry: define i1 @ugt_d(double %x, double %y) { ; MIPS32-LABEL: ugt_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -413,10 +387,9 @@ entry: define i1 @ule_d(double %x, double %y) { ; MIPS32-LABEL: ule_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -426,10 +399,9 @@ entry: define i1 @ogt_d(double %x, double %y) { ; MIPS32-LABEL: ogt_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll index f4ca9e5b53711..85feeda82e254 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll @@ -18,22 +18,22 @@ define double @e_double_precision() { ; FP32-LABEL: e_double_precision: ; FP32: # %bb.0: # %entry ; FP32-NEXT: lui $1, 16389 -; FP32-NEXT: ori $1, $1, 48906 -; FP32-NEXT: lui $2, 35604 -; FP32-NEXT: ori $2, $2, 22377 -; FP32-NEXT: mtc1 $2, $f0 -; FP32-NEXT: mtc1 $1, $f1 +; FP32-NEXT: ori $2, $1, 48906 +; FP32-NEXT: lui $1, 35604 +; FP32-NEXT: ori $1, $1, 22377 +; FP32-NEXT: mtc1 $1, $f0 +; FP32-NEXT: mtc1 $2, $f1 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop ; ; FP64-LABEL: e_double_precision: ; FP64: # %bb.0: # %entry ; FP64-NEXT: lui $1, 16389 -; FP64-NEXT: ori $1, $1, 48906 -; FP64-NEXT: lui $2, 35604 -; FP64-NEXT: ori $2, $2, 22377 -; FP64-NEXT: mtc1 $2, $f0 -; FP64-NEXT: mthc1 $1, $f0 +; FP64-NEXT: ori $2, $1, 48906 +; FP64-NEXT: lui $1, 35604 +; FP64-NEXT: ori $1, $1, 22377 +; FP64-NEXT: mtc1 $1, $f0 +; FP64-NEXT: mthc1 $2, $f0 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll index a98c6eb9fd6cb..e9cc0b933f719 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -164,20 +164,20 @@ define zeroext i16 @f32tou16(float %a) { ; MIPS32-LABEL: f32tou16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: trunc.w.s $f0, $f12 -; MIPS32-NEXT: mfc1 $1, $f0 -; MIPS32-NEXT: lui $2, 20224 -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: mfc1 $2, $f0 +; MIPS32-NEXT: lui $1, 20224 +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: sub.s $f1, $f12, $f0 ; MIPS32-NEXT: trunc.w.s $f1, $f1 -; MIPS32-NEXT: mfc1 $2, $f1 +; MIPS32-NEXT: mfc1 $1, $f1 ; MIPS32-NEXT: lui $3, 32768 -; MIPS32-NEXT: xor $2, $2, $3 +; MIPS32-NEXT: xor $1, $1, $3 ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 ; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 -; MIPS32-NEXT: andi $2, $2, 65535 +; MIPS32-NEXT: movn $1, $2, $3 +; MIPS32-NEXT: andi $2, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -189,20 +189,20 @@ define zeroext i8 @f32tou8(float %a) { ; MIPS32-LABEL: f32tou8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: trunc.w.s $f0, $f12 -; MIPS32-NEXT: mfc1 $1, $f0 -; MIPS32-NEXT: lui $2, 20224 -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: mfc1 $2, $f0 +; MIPS32-NEXT: lui $1, 20224 +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: sub.s $f1, $f12, $f0 ; MIPS32-NEXT: trunc.w.s $f1, $f1 -; MIPS32-NEXT: mfc1 $2, $f1 +; MIPS32-NEXT: mfc1 $1, $f1 ; MIPS32-NEXT: lui $3, 32768 -; MIPS32-NEXT: xor $2, $2, $3 +; MIPS32-NEXT: xor $1, $1, $3 ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 ; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 -; MIPS32-NEXT: andi $2, $2, 255 +; MIPS32-NEXT: movn $1, $2, $3 +; MIPS32-NEXT: andi $2, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -233,10 +233,10 @@ define i32 @f64tou32(double %a) { ; FP32: # %bb.0: # %entry ; FP32-NEXT: trunc.w.d $f0, $f12 ; FP32-NEXT: mfc1 $1, $f0 -; FP32-NEXT: lui $2, 16864 -; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $3, 16864 +; FP32-NEXT: ori $2, $zero, 0 +; FP32-NEXT: mtc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f1 ; FP32-NEXT: sub.d $f2, $f12, $f0 ; FP32-NEXT: trunc.w.d $f2, $f2 ; FP32-NEXT: mfc1 $2, $f2 @@ -254,10 +254,10 @@ define i32 @f64tou32(double %a) { ; FP64: # %bb.0: # %entry ; FP64-NEXT: trunc.w.d $f0, $f12 ; FP64-NEXT: mfc1 $1, $f0 -; FP64-NEXT: lui $2, 16864 -; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $3, 16864 +; FP64-NEXT: ori $2, $zero, 0 +; FP64-NEXT: mtc1 $2, $f0 +; FP64-NEXT: mthc1 $3, $f0 ; FP64-NEXT: sub.d $f1, $f12, $f0 ; FP64-NEXT: trunc.w.d $f1, $f1 ; FP64-NEXT: mfc1 $2, $f1 @@ -279,44 +279,44 @@ define zeroext i16 @f64tou16(double %a) { ; FP32-LABEL: f64tou16: ; FP32: # %bb.0: # %entry ; FP32-NEXT: trunc.w.d $f0, $f12 -; FP32-NEXT: mfc1 $1, $f0 -; FP32-NEXT: lui $2, 16864 -; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: lui $3, 16864 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f0 +; FP32-NEXT: mtc1 $3, $f1 ; FP32-NEXT: sub.d $f2, $f12, $f0 ; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mfc1 $1, $f2 ; FP32-NEXT: lui $3, 32768 -; FP32-NEXT: xor $2, $2, $3 +; FP32-NEXT: xor $1, $1, $3 ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 -; FP32-NEXT: movn $2, $1, $3 -; FP32-NEXT: andi $2, $2, 65535 +; FP32-NEXT: movn $1, $2, $3 +; FP32-NEXT: andi $2, $1, 65535 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop ; ; FP64-LABEL: f64tou16: ; FP64: # %bb.0: # %entry ; FP64-NEXT: trunc.w.d $f0, $f12 -; FP64-NEXT: mfc1 $1, $f0 -; FP64-NEXT: lui $2, 16864 -; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: lui $3, 16864 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f0 +; FP64-NEXT: mthc1 $3, $f0 ; FP64-NEXT: sub.d $f1, $f12, $f0 ; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mfc1 $1, $f1 ; FP64-NEXT: lui $3, 32768 -; FP64-NEXT: xor $2, $2, $3 +; FP64-NEXT: xor $1, $1, $3 ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 -; FP64-NEXT: movn $2, $1, $3 -; FP64-NEXT: andi $2, $2, 65535 +; FP64-NEXT: movn $1, $2, $3 +; FP64-NEXT: andi $2, $1, 65535 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop entry: @@ -328,44 +328,44 @@ define zeroext i8 @f64tou8(double %a) { ; FP32-LABEL: f64tou8: ; FP32: # %bb.0: # %entry ; FP32-NEXT: trunc.w.d $f0, $f12 -; FP32-NEXT: mfc1 $1, $f0 -; FP32-NEXT: lui $2, 16864 -; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: lui $3, 16864 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f0 +; FP32-NEXT: mtc1 $3, $f1 ; FP32-NEXT: sub.d $f2, $f12, $f0 ; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mfc1 $1, $f2 ; FP32-NEXT: lui $3, 32768 -; FP32-NEXT: xor $2, $2, $3 +; FP32-NEXT: xor $1, $1, $3 ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 -; FP32-NEXT: movn $2, $1, $3 -; FP32-NEXT: andi $2, $2, 255 +; FP32-NEXT: movn $1, $2, $3 +; FP32-NEXT: andi $2, $1, 255 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop ; ; FP64-LABEL: f64tou8: ; FP64: # %bb.0: # %entry ; FP64-NEXT: trunc.w.d $f0, $f12 -; FP64-NEXT: mfc1 $1, $f0 -; FP64-NEXT: lui $2, 16864 -; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: lui $3, 16864 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f0 +; FP64-NEXT: mthc1 $3, $f0 ; FP64-NEXT: sub.d $f1, $f12, $f0 ; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mfc1 $1, $f1 ; FP64-NEXT: lui $3, 32768 -; FP64-NEXT: xor $2, $2, $3 +; FP64-NEXT: xor $1, $1, $3 ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 -; FP64-NEXT: movn $2, $1, $3 -; FP64-NEXT: andi $2, $2, 255 +; FP64-NEXT: movn $1, $2, $3 +; FP64-NEXT: andi $2, $1, 255 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll index 6e7e56aaa1bac..a23ab7c3ca8f7 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll @@ -14,12 +14,11 @@ define i32 @main() { ; MIPS32-NEXT: addiu $4, $1, %lo($.str) ; MIPS32-NEXT: lui $1, 18838 ; MIPS32-NEXT: ori $5, $1, 722 -; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal printf ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll index e293a565fc707..8e8ca91eb9de4 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll @@ -23,9 +23,8 @@ define i32 @call_global(i32 %a, i32 %b) { ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $25, %call16(f)($1) -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: addu $gp, $2, $25 +; MIPS32_PIC-NEXT: lw $25, %call16(f)($gp) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload @@ -46,10 +45,9 @@ define i32 @call_global_with_local_linkage(i32 %a, i32 %b) { ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $2, %got(f_with_local_linkage)($1) -; MIPS32_PIC-NEXT: addiu $25, $2, %lo(f_with_local_linkage) -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: addu $gp, $2, $25 +; MIPS32_PIC-NEXT: lw $1, %got(f_with_local_linkage)($gp) +; MIPS32_PIC-NEXT: addiu $25, $1, %lo(f_with_local_linkage) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll index 7eb952b47c560..a7e0d05544be8 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll @@ -188,13 +188,12 @@ entry: define i1 @sgt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: sgt_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $7, $5 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: slt $2, $7, $5 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -206,14 +205,13 @@ define i1 @sge_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: sge_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $5, $7 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -224,13 +222,12 @@ entry: define i1 @slt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: slt_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $5, $7 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: slt $2, $5, $7 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -242,14 +239,13 @@ define i1 @sle_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: sle_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $7, $5 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -260,13 +256,12 @@ entry: define i1 @ugt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ugt_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $1, $7, $5 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: sltu $2, $7, $5 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -278,14 +273,13 @@ define i1 @uge_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: uge_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $5, $7 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -296,13 +290,12 @@ entry: define i1 @ult_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ult_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $1, $5, $7 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: sltu $2, $5, $7 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -314,14 +307,13 @@ define i1 @ule_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ule_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $7, $5 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll index dcd6c76a8b2a9..804a14853bedb 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll @@ -7,35 +7,35 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -32 ; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: ori $1, $zero, 7 ; MIPS32-NEXT: ori $2, $zero, 3 -; MIPS32-NEXT: ori $3, $zero, 2 -; MIPS32-NEXT: ori $5, $zero, 1 -; MIPS32-NEXT: ori $6, $zero, 0 -; MIPS32-NEXT: addiu $7, $zero, 65535 -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: subu $8, $4, $8 -; MIPS32-NEXT: sltu $1, $1, $8 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: sw $4, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 2 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 1 +; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $2, $zero, 65535 ; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: subu $2, $4, $2 +; MIPS32-NEXT: sw $2, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sltu $1, $1, $2 +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: bnez $1, $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_1: # %entry +; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lui $1, %hi($JTI0_0) -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sll $3, $2, 2 -; MIPS32-NEXT: addu $1, $1, $3 +; MIPS32-NEXT: sll $2, $2, 2 +; MIPS32-NEXT: addu $1, $1, $2 ; MIPS32-NEXT: lw $1, %lo($JTI0_0)($1) ; MIPS32-NEXT: jr $1 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %sw.bb -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -45,37 +45,37 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_4: # %sw.bb2 -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_5: # %sw.bb3 -; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_6: # %sw.default ; MIPS32-NEXT: .insn ; MIPS32-NEXT: # %bb.7: # %sw.epilog -; MIPS32-NEXT: ori $1, $zero, 8 -; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: subu $1, $2, $1 -; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sltu $4, $3, $1 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB0_13 +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ori $3, $zero, 8 +; MIPS32-NEXT: subu $2, $2, $3 +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sltu $1, $1, $2 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_13 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %sw.epilog -; MIPS32-NEXT: lui $1, %hi($JTI0_1) ; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sll $3, $2, 2 -; MIPS32-NEXT: addu $1, $1, $3 +; MIPS32-NEXT: lui $1, %hi($JTI0_1) +; MIPS32-NEXT: sll $2, $2, 2 +; MIPS32-NEXT: addu $1, $1, $2 ; MIPS32-NEXT: lw $1, %lo($JTI0_1)($1) ; MIPS32-NEXT: jr $1 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_9: # %sw.bb4 -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -85,35 +85,20 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_11: # %sw.bb6 -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_12: # %sw.bb7 -; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_13: # %sw.default8 -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32: $JTI0_0: -; MIPS32-NEXT: .4byte ($BB0_2) -; MIPS32-NEXT: .4byte ($BB0_3) -; MIPS32-NEXT: .4byte ($BB0_4) -; MIPS32-NEXT: .4byte ($BB0_5) -; MIPS32-NEXT: .4byte ($BB0_2) -; MIPS32-NEXT: .4byte ($BB0_3) -; MIPS32-NEXT: .4byte ($BB0_4) -; MIPS32-NEXT: .4byte ($BB0_5) -; MIPS32-NEXT: $JTI0_1: -; MIPS32-NEXT: .4byte ($BB0_9) -; MIPS32-NEXT: .4byte ($BB0_10) -; MIPS32-NEXT: .4byte ($BB0_11) -; MIPS32-NEXT: .4byte ($BB0_12) - ; ; MIPS32_PIC-LABEL: mod4_0_to_11: ; MIPS32_PIC: # %bb.0: # %entry @@ -122,117 +107,104 @@ define i32 @mod4_0_to_11(i32 %a) { ; MIPS32_PIC-NEXT: addiu $sp, $sp, -40 ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 40 ; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: ori $2, $zero, 7 -; MIPS32_PIC-NEXT: ori $3, $zero, 3 -; MIPS32_PIC-NEXT: ori $5, $zero, 2 -; MIPS32_PIC-NEXT: ori $6, $zero, 1 -; MIPS32_PIC-NEXT: ori $7, $zero, 0 -; MIPS32_PIC-NEXT: addiu $8, $zero, 65535 -; MIPS32_PIC-NEXT: ori $9, $zero, 0 -; MIPS32_PIC-NEXT: subu $9, $4, $9 -; MIPS32_PIC-NEXT: sltu $2, $2, $9 -; MIPS32_PIC-NEXT: andi $2, $2, 1 -; MIPS32_PIC-NEXT: sw $1, 36($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $4, 32($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $3, 28($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $5, 24($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $6, 20($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $7, 16($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $8, 12($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $9, 8($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: bnez $2, $BB0_6 +; MIPS32_PIC-NEXT: sw $1, 8($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: sw $4, 12($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $1, $zero, 7 +; MIPS32_PIC-NEXT: ori $2, $zero, 3 +; MIPS32_PIC-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 2 +; MIPS32_PIC-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 1 +; MIPS32_PIC-NEXT: sw $2, 24($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 0 +; MIPS32_PIC-NEXT: sw $2, 28($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: addiu $2, $zero, 65535 +; MIPS32_PIC-NEXT: sw $2, 32($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 0 +; MIPS32_PIC-NEXT: subu $2, $4, $2 +; MIPS32_PIC-NEXT: sw $2, 36($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: sltu $1, $1, $2 +; MIPS32_PIC-NEXT: andi $1, $1, 1 +; MIPS32_PIC-NEXT: bnez $1, $BB0_6 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_1: # %entry -; MIPS32_PIC-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: lw $2, %got($JTI0_0)($1) -; MIPS32_PIC-NEXT: lw $3, 8($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: sll $4, $3, 2 -; MIPS32_PIC-NEXT: addu $2, $2, $4 -; MIPS32_PIC-NEXT: lw $2, %lo($JTI0_0)($2) -; MIPS32_PIC-NEXT: addu $2, $2, $1 -; MIPS32_PIC-NEXT: jr $2 +; MIPS32_PIC-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $3, 36($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $1, %got($JTI0_0)($2) +; MIPS32_PIC-NEXT: sll $3, $3, 2 +; MIPS32_PIC-NEXT: addu $1, $1, $3 +; MIPS32_PIC-NEXT: lw $1, %lo($JTI0_0)($1) +; MIPS32_PIC-NEXT: addu $1, $1, $2 +; MIPS32_PIC-NEXT: jr $1 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_2: # %sw.bb -; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_3: # %sw.bb1 -; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_4: # %sw.bb2 -; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_5: # %sw.bb3 -; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_6: # %sw.default ; MIPS32_PIC-NEXT: .insn ; MIPS32_PIC-NEXT: # %bb.7: # %sw.epilog -; MIPS32_PIC-NEXT: ori $1, $zero, 8 -; MIPS32_PIC-NEXT: lw $2, 32($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: subu $1, $2, $1 -; MIPS32_PIC-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: sltu $4, $3, $1 -; MIPS32_PIC-NEXT: andi $4, $4, 1 -; MIPS32_PIC-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: bnez $4, $BB0_13 +; MIPS32_PIC-NEXT: lw $1, 16($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: ori $3, $zero, 8 +; MIPS32_PIC-NEXT: subu $2, $2, $3 +; MIPS32_PIC-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: sltu $1, $1, $2 +; MIPS32_PIC-NEXT: andi $1, $1, 1 +; MIPS32_PIC-NEXT: bnez $1, $BB0_13 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_8: # %sw.epilog -; MIPS32_PIC-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: lw $2, %got($JTI0_1)($1) +; MIPS32_PIC-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: sll $4, $3, 2 -; MIPS32_PIC-NEXT: addu $2, $2, $4 -; MIPS32_PIC-NEXT: lw $2, %lo($JTI0_1)($2) -; MIPS32_PIC-NEXT: addu $2, $2, $1 -; MIPS32_PIC-NEXT: jr $2 +; MIPS32_PIC-NEXT: lw $1, %got($JTI0_1)($2) +; MIPS32_PIC-NEXT: sll $3, $3, 2 +; MIPS32_PIC-NEXT: addu $1, $1, $3 +; MIPS32_PIC-NEXT: lw $1, %lo($JTI0_1)($1) +; MIPS32_PIC-NEXT: addu $1, $1, $2 +; MIPS32_PIC-NEXT: jr $1 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_9: # %sw.bb4 -; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_10: # %sw.bb5 -; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_11: # %sw.bb6 -; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_12: # %sw.bb7 -; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_13: # %sw.default8 -; MIPS32_PIC-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 32($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop -; MIPS32_PIC: $JTI0_0: -; MIPS32_PIC-NEXT: .gpword ($BB0_2) -; MIPS32_PIC-NEXT: .gpword ($BB0_3) -; MIPS32_PIC-NEXT: .gpword ($BB0_4) -; MIPS32_PIC-NEXT: .gpword ($BB0_5) -; MIPS32_PIC-NEXT: .gpword ($BB0_2) -; MIPS32_PIC-NEXT: .gpword ($BB0_3) -; MIPS32_PIC-NEXT: .gpword ($BB0_4) -; MIPS32_PIC-NEXT: .gpword ($BB0_5) -; MIPS32_PIC-NEXT: $JTI0_1: -; MIPS32_PIC-NEXT: .gpword ($BB0_9) -; MIPS32_PIC-NEXT: .gpword ($BB0_10) -; MIPS32_PIC-NEXT: .gpword ($BB0_11) -; MIPS32_PIC-NEXT: .gpword ($BB0_12) + entry: switch i32 %a, label %sw.default [ diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll index 318407d619f5a..90043c0e9a122 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll @@ -15,11 +15,11 @@ define float @load_float_align1() { ; MIPS32-LABEL: load_float_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align1) -; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: addiu $2, $1, %lo(float_align1) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($2) +; MIPS32-NEXT: lwr $1, 0($2) +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -38,11 +38,11 @@ define float @load_float_align2() { ; MIPS32-LABEL: load_float_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align2) -; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: addiu $2, $1, %lo(float_align2) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($2) +; MIPS32-NEXT: lwr $1, 0($2) +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll index c7a70d56f8a02..a2afbf1c637ec 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll @@ -131,25 +131,23 @@ entry: define i64 @load5align1(%struct.MemSize5_Align1* %S) { ; MIPS32-LABEL: load5align1: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: # implicit-def: $v0 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align1: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align1* %S to i40* @@ -161,25 +159,23 @@ entry: define i64 @load5align2(%struct.MemSize5_Align2* %S) { ; MIPS32-LABEL: load5align2: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: # implicit-def: $v0 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align2: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align2* %S to i40* @@ -191,23 +187,21 @@ entry: define i64 @load5align4(%struct.MemSize5_Align4* %S) { ; MIPS32-LABEL: load5align4: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align4: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align4* %S to i40* @@ -219,23 +213,21 @@ entry: define i64 @load5align8(%struct.MemSize5_Align8* %S) { ; MIPS32-LABEL: load5align8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align8: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align8* %S to i40* @@ -247,27 +239,25 @@ entry: define i64 @load6align1(%struct.MemSize6_Align1* %S) { ; MIPS32-LABEL: load6align1: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 7($4) -; MIPS32-NEXT: lwr $2, 4($4) +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 7($4) +; MIPS32-NEXT: lwr $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align1: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align1* %S to i48* @@ -279,25 +269,23 @@ entry: define i64 @load6align2(%struct.MemSize6_Align2* %S) { ; MIPS32-LABEL: load6align2: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) -; MIPS32-NEXT: lhu $2, 4($4) +; MIPS32-NEXT: # implicit-def: $v0 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: lhu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align2: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align2* %S to i48* @@ -309,23 +297,21 @@ entry: define i64 @load6align4(%struct.MemSize6_Align4* %S) { ; MIPS32-LABEL: load6align4: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lhu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lhu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align4: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align4* %S to i48* @@ -337,23 +323,21 @@ entry: define i64 @load6align8(%struct.MemSize6_Align8* %S) { ; MIPS32-LABEL: load6align8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lhu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lhu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align8: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align8* %S to i48* @@ -365,31 +349,29 @@ entry: define i64 @load7align1(%struct.MemSize7_Align1* %S) { ; MIPS32-LABEL: load7align1: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 7($4) -; MIPS32-NEXT: lwr $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 7($4) +; MIPS32-NEXT: lwr $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align1: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align1* %S to i56* @@ -401,31 +383,29 @@ entry: define i64 @load7align2(%struct.MemSize7_Align2* %S) { ; MIPS32-LABEL: load7align2: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 7($4) -; MIPS32-NEXT: lwr $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 7($4) +; MIPS32-NEXT: lwr $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align2: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align2* %S to i56* @@ -437,27 +417,25 @@ entry: define i64 @load7align4(%struct.MemSize7_Align4* %S) { ; MIPS32-LABEL: load7align4: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lw $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align4: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align4* %S to i56* @@ -469,27 +447,25 @@ entry: define i64 @load7align8(%struct.MemSize7_Align8* %S) { ; MIPS32-LABEL: load7align8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lw $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align8: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align8* %S to i56* @@ -502,15 +478,15 @@ define double @load_double_align1() { ; MIPS32-LABEL: load_double_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align1) +; MIPS32-NEXT: addiu $3, $1, %lo(double_align1) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($3) +; MIPS32-NEXT: lwr $1, 0($3) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: # implicit-def: $v1 -; MIPS32-NEXT: lwl $3, 7($1) -; MIPS32-NEXT: lwr $3, 4($1) -; MIPS32-NEXT: mtc1 $2, $f0 -; MIPS32-NEXT: mtc1 $3, $f1 +; MIPS32-NEXT: lwl $2, 7($3) +; MIPS32-NEXT: lwr $2, 4($3) +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -529,15 +505,15 @@ define double @load_double_align2() { ; MIPS32-LABEL: load_double_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align2) +; MIPS32-NEXT: addiu $3, $1, %lo(double_align2) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($3) +; MIPS32-NEXT: lwr $1, 0($3) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: # implicit-def: $v1 -; MIPS32-NEXT: lwl $3, 7($1) -; MIPS32-NEXT: lwr $3, 4($1) -; MIPS32-NEXT: mtc1 $2, $f0 -; MIPS32-NEXT: mtc1 $3, $f1 +; MIPS32-NEXT: lwl $2, 7($3) +; MIPS32-NEXT: lwr $2, 4($3) +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -556,11 +532,11 @@ define double @load_double_align4() { ; MIPS32-LABEL: load_double_align4: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align4) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align4) -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $1, 4($1) -; MIPS32-NEXT: mtc1 $2, $f0 -; MIPS32-NEXT: mtc1 $1, $f1 +; MIPS32-NEXT: addiu $2, $1, %lo(double_align4) +; MIPS32-NEXT: lw $1, 0($2) +; MIPS32-NEXT: lw $2, 4($2) +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll index 2dcc174860c10..ce46bed175d65 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll @@ -6,126 +6,124 @@ define void @long_chain_ambiguous_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -48 ; MIPS32-NEXT: .cfi_def_cfa_offset 48 +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 64 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 68 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 72 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 68 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 72 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB0_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB0_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_8 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB0_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_11 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB0_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_14 +; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB0_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_19 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB0_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: lw $4, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $5, $4, 1 -; MIPS32-NEXT: move $6, $2 -; MIPS32-NEXT: movn $6, $1, $5 -; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $6, 0($1) -; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: movn $4, $1, $5 +; MIPS32-NEXT: andi $5, $3, 1 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $3, $4, $5 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -193,132 +191,130 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* % ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -56 ; MIPS32-NEXT: .cfi_def_cfa_offset 56 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 72 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 76 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 80 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: andi $9, $4, 1 +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 76 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 80 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 ; MIPS32-NEXT: sw $1, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB1_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_7 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_8 +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB1_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: lw $5, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_11 +; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_14 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB1_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: move $5, $1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_19 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: move $3, $2 +; MIPS32-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB1_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: lw $4, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $5, $4, 1 -; MIPS32-NEXT: move $6, $2 -; MIPS32-NEXT: movn $6, $1, $5 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $6, 0($1) -; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: movn $4, $1, $5 +; MIPS32-NEXT: andi $5, $3, 1 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $3, $4, $5 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -385,126 +381,124 @@ define void @long_chain_ambiguous_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, flo ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -48 ; MIPS32-NEXT: .cfi_def_cfa_offset 48 +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 64 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 68 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 72 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 68 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 72 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB2_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB2_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB2_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB2_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_11 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB2_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_14 +; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB2_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_19 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: lw $4, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $5, $4, 1 -; MIPS32-NEXT: move $6, $2 -; MIPS32-NEXT: movn $6, $1, $5 -; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $6, 0($1) -; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: movn $4, $1, $5 +; MIPS32-NEXT: andi $5, $3, 1 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $3, $4, $5 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -572,40 +566,40 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -56 ; MIPS32-NEXT: .cfi_def_cfa_offset 56 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 72 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 76 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 80 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: mtc1 $8, $f0 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: sw $1, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: swc1 $f0, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 76 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 80 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: swc1 $f0, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB3_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_7 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB3_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB3_6 @@ -617,40 +611,39 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB3_9: # %b.PHI.1 -; MIPS32-NEXT: lwc1 $f0, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.s $f1, $f0 -; MIPS32-NEXT: lwc1 $f2, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lwc1 $f0, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f1, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: swc1 $f1, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end -; MIPS32-NEXT: lwc1 $f0, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_14 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB3_15 @@ -662,43 +655,42 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl ; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB3_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.s $f1, $f0 -; MIPS32-NEXT: mov.s $f2, $f0 ; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: mov.s $f1, $f0 ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: lwc1 $f0, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_19: # %b.PHI.3 -; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lwc1 $f1, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.s $f0, $f1, $2 -; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.s $f2, $f1 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f0, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f2, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn.s $f2, $f0, $3 -; MIPS32-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: swc1 $f2, 0($3) -; MIPS32-NEXT: swc1 $f1, 0($3) +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.s $f1, $f0 +; MIPS32-NEXT: movn.s $f1, $f2, $2 +; MIPS32-NEXT: swc1 $f1, 0($1) +; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll index bafa309df76a1..2a5afd5b1022b 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll @@ -6,126 +6,124 @@ define void @long_chain_ambiguous_i64_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -72 ; MIPS32-NEXT: .cfi_def_cfa_offset 72 +; MIPS32-NEXT: sw $4, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 56($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 88 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 92 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 96 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 92 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 96 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB0_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB0_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_8 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB0_9: # %b.PHI.1 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_11 +; MIPS32-NEXT: bnez $1, $BB0_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end -; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_14 +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB0_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_19 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB0_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_19: # %b.PHI.3 -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.d $f0, $f2, $2 -; MIPS32-NEXT: lw $2, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.d $f4, $f2 +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload +; MIPS32-NEXT: mov.d $f4, $f0 +; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn.d $f4, $f0, $3 -; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f4, 0($3) -; MIPS32-NEXT: sdc1 $f2, 0($3) +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: movn.d $f2, $f4, $2 +; MIPS32-NEXT: sdc1 $f2, 0($1) +; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -193,39 +191,39 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -80 ; MIPS32-NEXT: .cfi_def_cfa_offset 80 +; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 60($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 96 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 100 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 104 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: andi $9, $4, 1 +; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 100 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 104 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 72($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 ; MIPS32-NEXT: sw $1, 76($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 72($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 56($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB1_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_7 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_8 +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB1_6 @@ -233,58 +231,56 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: move $5, $2 -; MIPS32-NEXT: move $6, $1 -; MIPS32-NEXT: lw $7, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $8, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_11 +; MIPS32-NEXT: lw $2, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $3, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $3, $2 +; MIPS32-NEXT: sw $3, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 0($2) -; MIPS32-NEXT: lw $3, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $3, 4($2) +; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_14 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB1_15 @@ -292,66 +288,64 @@ define void @long_chain_i64_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i64* %a, i64* % ; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 68($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: move $5, $2 -; MIPS32-NEXT: move $6, $1 -; MIPS32-NEXT: move $7, $2 -; MIPS32-NEXT: move $8, $1 -; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_19 +; MIPS32-NEXT: sw $3, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: move $4, $3 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $4, $2 +; MIPS32-NEXT: sw $4, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $3, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB1_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 0($2) +; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $3, 4($2) +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $4, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $5, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $6, $5, 1 -; MIPS32-NEXT: movn $2, $4, $6 -; MIPS32-NEXT: movn $1, $3, $6 -; MIPS32-NEXT: lw $6, 68($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $7, $6, 1 -; MIPS32-NEXT: move $8, $4 -; MIPS32-NEXT: movn $8, $2, $7 -; MIPS32-NEXT: move $2, $3 -; MIPS32-NEXT: movn $2, $1, $7 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $8, 0($1) -; MIPS32-NEXT: sw $2, 4($1) -; MIPS32-NEXT: sw $4, 0($1) -; MIPS32-NEXT: sw $3, 4($1) +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $6, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $7, $7, 1 +; MIPS32-NEXT: movn $4, $3, $7 +; MIPS32-NEXT: movn $6, $1, $7 +; MIPS32-NEXT: andi $7, $5, 1 +; MIPS32-NEXT: move $5, $3 +; MIPS32-NEXT: movn $5, $4, $7 +; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: movn $4, $6, $7 +; MIPS32-NEXT: sw $5, 0($2) +; MIPS32-NEXT: sw $4, 4($2) +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -418,126 +412,124 @@ define void @long_chain_ambiguous_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, do ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -72 ; MIPS32-NEXT: .cfi_def_cfa_offset 72 +; MIPS32-NEXT: sw $4, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 56($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 88 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 92 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 96 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 92 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 96 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB2_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB2_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB2_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB2_9: # %b.PHI.1 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_11 +; MIPS32-NEXT: bnez $1, $BB2_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end -; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_14 +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB2_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_19 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_19: # %b.PHI.3 -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.d $f0, $f2, $2 -; MIPS32-NEXT: lw $2, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.d $f4, $f2 +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload +; MIPS32-NEXT: mov.d $f4, $f0 +; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn.d $f4, $f0, $3 -; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f4, 0($3) -; MIPS32-NEXT: sdc1 $f2, 0($3) +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: movn.d $f2, $f4, $2 +; MIPS32-NEXT: sdc1 $f2, 0($1) +; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -605,135 +597,133 @@ define void @long_chain_double_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, double* %a, ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -88 ; MIPS32-NEXT: .cfi_def_cfa_offset 88 +; MIPS32-NEXT: sw $4, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 56($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 64($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 104 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 108 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 112 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: ori $9, $zero, 0 -; MIPS32-NEXT: mtc1 $9, $f0 -; MIPS32-NEXT: mtc1 $8, $f1 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: sw $1, 84($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 80($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 76($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 72($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sdc1 $f0, 48($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 108 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 72($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 112 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 76($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 +; MIPS32-NEXT: sdc1 $f0, 80($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB3_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_7 +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB3_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB3_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB3_9: # %b.PHI.1 -; MIPS32-NEXT: ldc1 $f0, 40($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 -; MIPS32-NEXT: ldc1 $f4, 48($sp) # 8-byte Folded Reload -; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill +; MIPS32-NEXT: ldc1 $f0, 80($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f2, 40($sp) # 8-byte Folded Reload +; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end -; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 80($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_14 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB3_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB3_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 -; MIPS32-NEXT: mov.d $f4, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_19: # %b.PHI.3 -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: ldc1 $f2, 24($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.d $f0, $f2, $2 -; MIPS32-NEXT: lw $2, 76($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.d $f4, $f2 -; MIPS32-NEXT: movn.d $f4, $f0, $3 +; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f4, 0($3) -; MIPS32-NEXT: sdc1 $f2, 0($3) +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f4, 32($sp) # 8-byte Folded Reload +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn.d $f4, $f0, $3 +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: movn.d $f2, $f4, $2 +; MIPS32-NEXT: sdc1 $f2, 0($1) +; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index f7250ccde898f..d249890bcfeaa 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -86,13 +86,14 @@ entry: define i64 @mul_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: mul_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: mul $2, $6, $4 -; MIPS32-NEXT: mul $1, $7, $4 -; MIPS32-NEXT: mul $3, $6, $5 -; MIPS32-NEXT: multu $6, $4 -; MIPS32-NEXT: mfhi $4 -; MIPS32-NEXT: addu $1, $1, $3 -; MIPS32-NEXT: addu $3, $1, $4 +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: mul $2, $6, $3 +; MIPS32-NEXT: mul $1, $7, $3 +; MIPS32-NEXT: mul $4, $6, $5 +; MIPS32-NEXT: multu $6, $3 +; MIPS32-NEXT: mfhi $3 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -103,72 +104,73 @@ entry: define i128 @mul_i128(i128 %a, i128 %b) { ; MIPS32-LABEL: mul_i128: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $14, $4 +; MIPS32-NEXT: move $13, $5 +; MIPS32-NEXT: move $12, $6 +; MIPS32-NEXT: move $9, $7 ; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $6, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $7, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 24 +; MIPS32-NEXT: lw $8, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 20 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 24 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: addiu $8, $sp, 28 -; MIPS32-NEXT: lw $8, 0($8) -; MIPS32-NEXT: mul $9, $1, $4 -; MIPS32-NEXT: mul $10, $2, $4 -; MIPS32-NEXT: mul $11, $1, $5 -; MIPS32-NEXT: multu $1, $4 -; MIPS32-NEXT: mfhi $12 -; MIPS32-NEXT: addu $10, $10, $11 -; MIPS32-NEXT: sltu $11, $10, $11 -; MIPS32-NEXT: andi $11, $11, 1 -; MIPS32-NEXT: addu $10, $10, $12 -; MIPS32-NEXT: sltu $12, $10, $12 -; MIPS32-NEXT: andi $12, $12, 1 -; MIPS32-NEXT: addu $11, $11, $12 -; MIPS32-NEXT: mul $12, $3, $4 -; MIPS32-NEXT: mul $13, $2, $5 -; MIPS32-NEXT: mul $14, $1, $6 -; MIPS32-NEXT: multu $2, $4 +; MIPS32-NEXT: mul $2, $6, $14 +; MIPS32-NEXT: mul $3, $7, $14 +; MIPS32-NEXT: mul $4, $6, $13 +; MIPS32-NEXT: multu $6, $14 +; MIPS32-NEXT: mfhi $5 +; MIPS32-NEXT: addu $3, $3, $4 +; MIPS32-NEXT: sltu $4, $3, $4 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: addu $3, $3, $5 +; MIPS32-NEXT: sltu $5, $3, $5 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: addu $10, $4, $5 +; MIPS32-NEXT: mul $4, $8, $14 +; MIPS32-NEXT: mul $5, $7, $13 +; MIPS32-NEXT: mul $24, $6, $12 +; MIPS32-NEXT: multu $7, $14 ; MIPS32-NEXT: mfhi $15 -; MIPS32-NEXT: multu $1, $5 -; MIPS32-NEXT: mfhi $24 -; MIPS32-NEXT: addu $12, $12, $13 -; MIPS32-NEXT: sltu $13, $12, $13 -; MIPS32-NEXT: andi $13, $13, 1 -; MIPS32-NEXT: addu $12, $12, $14 -; MIPS32-NEXT: sltu $14, $12, $14 -; MIPS32-NEXT: andi $14, $14, 1 -; MIPS32-NEXT: addu $13, $13, $14 -; MIPS32-NEXT: addu $12, $12, $15 -; MIPS32-NEXT: sltu $14, $12, $15 -; MIPS32-NEXT: andi $14, $14, 1 -; MIPS32-NEXT: addu $13, $13, $14 -; MIPS32-NEXT: addu $12, $12, $24 -; MIPS32-NEXT: sltu $14, $12, $24 -; MIPS32-NEXT: andi $14, $14, 1 -; MIPS32-NEXT: addu $13, $13, $14 -; MIPS32-NEXT: addu $12, $12, $11 -; MIPS32-NEXT: sltu $11, $12, $11 +; MIPS32-NEXT: multu $6, $13 +; MIPS32-NEXT: mfhi $11 +; MIPS32-NEXT: addu $4, $4, $5 +; MIPS32-NEXT: sltu $5, $4, $5 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: addu $4, $4, $24 +; MIPS32-NEXT: sltu $24, $4, $24 +; MIPS32-NEXT: andi $24, $24, 1 +; MIPS32-NEXT: addu $5, $5, $24 +; MIPS32-NEXT: addu $4, $4, $15 +; MIPS32-NEXT: sltu $15, $4, $15 +; MIPS32-NEXT: andi $15, $15, 1 +; MIPS32-NEXT: addu $5, $5, $15 +; MIPS32-NEXT: addu $4, $4, $11 +; MIPS32-NEXT: sltu $11, $4, $11 ; MIPS32-NEXT: andi $11, $11, 1 -; MIPS32-NEXT: addu $11, $13, $11 -; MIPS32-NEXT: mul $8, $8, $4 -; MIPS32-NEXT: mul $13, $3, $5 -; MIPS32-NEXT: mul $14, $2, $6 -; MIPS32-NEXT: mul $7, $1, $7 -; MIPS32-NEXT: multu $3, $4 -; MIPS32-NEXT: mfhi $3 -; MIPS32-NEXT: multu $2, $5 -; MIPS32-NEXT: mfhi $2 -; MIPS32-NEXT: multu $1, $6 -; MIPS32-NEXT: mfhi $1 -; MIPS32-NEXT: addu $4, $8, $13 -; MIPS32-NEXT: addu $4, $4, $14 -; MIPS32-NEXT: addu $4, $4, $7 -; MIPS32-NEXT: addu $3, $4, $3 -; MIPS32-NEXT: addu $2, $3, $2 -; MIPS32-NEXT: addu $1, $2, $1 -; MIPS32-NEXT: addu $5, $1, $11 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: move $3, $10 -; MIPS32-NEXT: move $4, $12 +; MIPS32-NEXT: addu $5, $5, $11 +; MIPS32-NEXT: addu $4, $4, $10 +; MIPS32-NEXT: sltu $10, $4, $10 +; MIPS32-NEXT: andi $10, $10, 1 +; MIPS32-NEXT: addu $5, $5, $10 +; MIPS32-NEXT: mul $1, $1, $14 +; MIPS32-NEXT: mul $11, $8, $13 +; MIPS32-NEXT: mul $10, $7, $12 +; MIPS32-NEXT: mul $9, $6, $9 +; MIPS32-NEXT: multu $8, $14 +; MIPS32-NEXT: mfhi $8 +; MIPS32-NEXT: multu $7, $13 +; MIPS32-NEXT: mfhi $7 +; MIPS32-NEXT: multu $6, $12 +; MIPS32-NEXT: mfhi $6 +; MIPS32-NEXT: addu $1, $1, $11 +; MIPS32-NEXT: addu $1, $1, $10 +; MIPS32-NEXT: addu $1, $1, $9 +; MIPS32-NEXT: addu $1, $1, $8 +; MIPS32-NEXT: addu $1, $1, $7 +; MIPS32-NEXT: addu $1, $1, $6 +; MIPS32-NEXT: addu $5, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -181,12 +183,12 @@ define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag ; MIPS32-LABEL: umul_with_overflow: ; MIPS32: # %bb.0: ; MIPS32-NEXT: multu $4, $5 -; MIPS32-NEXT: mfhi $1 -; MIPS32-NEXT: mul $2, $4, $5 -; MIPS32-NEXT: sltu $1, $zero, $1 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: sb $1, 0($7) -; MIPS32-NEXT: sw $2, 0($6) +; MIPS32-NEXT: mfhi $2 +; MIPS32-NEXT: mul $1, $4, $5 +; MIPS32-NEXT: sltu $2, $zero, $2 +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: sb $2, 0($7) +; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll index d3f085c239fd0..a71e75958cddf 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll @@ -4,9 +4,9 @@ define void @mul_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) { ; P5600-LABEL: mul_v16i8: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.b $w0, 0($4) -; P5600-NEXT: ld.b $w1, 0($5) -; P5600-NEXT: mulv.b $w0, $w1, $w0 +; P5600-NEXT: ld.b $w1, 0($4) +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: mulv.b $w0, $w0, $w1 ; P5600-NEXT: st.b $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -21,9 +21,9 @@ entry: define void @mul_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) { ; P5600-LABEL: mul_v8i16: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.h $w0, 0($4) -; P5600-NEXT: ld.h $w1, 0($5) -; P5600-NEXT: mulv.h $w0, $w1, $w0 +; P5600-NEXT: ld.h $w1, 0($4) +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: mulv.h $w0, $w0, $w1 ; P5600-NEXT: st.h $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -38,9 +38,9 @@ entry: define void @mul_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) { ; P5600-LABEL: mul_v4i32: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.w $w0, 0($4) -; P5600-NEXT: ld.w $w1, 0($5) -; P5600-NEXT: mulv.w $w0, $w1, $w0 +; P5600-NEXT: ld.w $w1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: mulv.w $w0, $w0, $w1 ; P5600-NEXT: st.w $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -55,9 +55,9 @@ entry: define void @mul_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) { ; P5600-LABEL: mul_v2i64: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.d $w0, 0($4) -; P5600-NEXT: ld.d $w1, 0($5) -; P5600-NEXT: mulv.d $w0, $w1, $w0 +; P5600-NEXT: ld.d $w1, 0($4) +; P5600-NEXT: ld.d $w0, 0($5) +; P5600-NEXT: mulv.d $w0, $w0, $w1 ; P5600-NEXT: st.d $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll index 410c53f987518..d44023bf7f0c8 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll @@ -6,25 +6,24 @@ define i1 @phi_i1(i1 %cnd, i1 %a, i1 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB0_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB0_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB0_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -47,25 +46,24 @@ define i8 @phi_i8(i1 %cnd, i8 %a, i8 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB1_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -88,25 +86,24 @@ define i16 @phi_i16(i1 %cnd, i16 %a, i16 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB2_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB2_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB2_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -129,25 +126,24 @@ define i32 @phi_i32(i1 %cnd, i32 %a, i32 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB3_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB3_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB3_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB3_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -170,36 +166,35 @@ define i64 @phi_i64(i1 %cnd, i64 %a, i64 %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -24 ; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 40 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 44 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: andi $3, $4, 1 +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 44 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB4_2 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB4_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB4_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB4_2: # %cond.true -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB4_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB4_3: # %cond.false ; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB4_4: # %cond.end -; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -222,12 +217,12 @@ define void @phi_ambiguous_i64_in_fpr(i1 %cnd, i64* %i64_ptr_a, i64* %i64_ptr_b, ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -32 ; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: ldc1 $f0, 0($5) -; MIPS32-NEXT: ldc1 $f2, 0($6) -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 8($sp) # 8-byte Folded Spill +; MIPS32-NEXT: ldc1 $f0, 0($6) +; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $4, 1 ; MIPS32-NEXT: bnez $1, $BB5_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry @@ -239,11 +234,11 @@ define void @phi_ambiguous_i64_in_fpr(i1 %cnd, i64* %i64_ptr_a, i64* %i64_ptr_b, ; MIPS32-NEXT: j $BB5_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB5_3: # %cond.false -; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB5_4: # %cond.end +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra @@ -270,21 +265,21 @@ define float @phi_float(i1 %cnd, float %a, float %b) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB6_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB6_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB6_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB6_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB6_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB6_4: # %cond.end ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload @@ -311,28 +306,28 @@ define void @phi_ambiguous_float_in_gpr(i1 %cnd, float* %f32_ptr_a, float* %f32_ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $7, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: lw $1, 0($5) -; MIPS32-NEXT: lw $2, 0($6) -; MIPS32-NEXT: andi $3, $4, 1 +; MIPS32-NEXT: sw $1, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 0($6) ; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB7_2 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB7_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB7_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB7_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB7_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB7_3: # %cond.false -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB7_4: # %cond.end +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra @@ -359,23 +354,23 @@ define double @phi_double(double %a, double %b, i1 %cnd) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -24 ; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sdc1 $f12, 8($sp) # 8-byte Folded Spill +; MIPS32-NEXT: sdc1 $f14, 16($sp) # 8-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 40 ; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: sdc1 $f12, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f14, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB8_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB8_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB8_2: # %cond.true -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB8_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB8_3: # %cond.false -; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB8_4: # %cond.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll index d2520daf6f262..f60bd998b7c88 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll @@ -6,11 +6,11 @@ define signext i8 @sdiv_i8(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: sdiv_i8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 24 +; MIPS32-NEXT: sra $2, $1, 24 +; MIPS32-NEXT: sll $1, $4, 24 ; MIPS32-NEXT: sra $1, $1, 24 -; MIPS32-NEXT: sll $2, $4, 24 -; MIPS32-NEXT: sra $2, $2, 24 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -25,11 +25,11 @@ define signext i16 @sdiv_i16(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: sdiv_i16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 16 +; MIPS32-NEXT: sra $2, $1, 16 +; MIPS32-NEXT: sll $1, $4, 16 ; MIPS32-NEXT: sra $1, $1, 16 -; MIPS32-NEXT: sll $2, $4, 16 -; MIPS32-NEXT: sra $2, $2, 16 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -60,12 +60,12 @@ define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b) { ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __divdi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload @@ -82,11 +82,11 @@ define signext i8 @srem_i8(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: srem_i8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 24 +; MIPS32-NEXT: sra $2, $1, 24 +; MIPS32-NEXT: sll $1, $4, 24 ; MIPS32-NEXT: sra $1, $1, 24 -; MIPS32-NEXT: sll $2, $4, 24 -; MIPS32-NEXT: sra $2, $2, 24 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -101,11 +101,11 @@ define signext i16 @srem_i16(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: srem_i16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 16 +; MIPS32-NEXT: sra $2, $1, 16 +; MIPS32-NEXT: sll $1, $4, 16 ; MIPS32-NEXT: sra $1, $1, 16 -; MIPS32-NEXT: sll $2, $4, 16 -; MIPS32-NEXT: sra $2, $2, 16 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mfhi $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -136,12 +136,12 @@ define signext i64 @srem_i64(i64 signext %a, i64 signext %b) { ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __moddi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload @@ -157,10 +157,10 @@ entry: define signext i8 @udiv_i8(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: udiv_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 255 -; MIPS32-NEXT: andi $2, $4, 255 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 255 +; MIPS32-NEXT: andi $1, $4, 255 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -174,10 +174,10 @@ entry: define signext i16 @udiv_i16(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: udiv_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 65535 -; MIPS32-NEXT: andi $2, $4, 65535 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 65535 +; MIPS32-NEXT: andi $1, $4, 65535 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -208,12 +208,12 @@ define signext i64 @udiv_i64(i64 signext %a, i64 signext %b) { ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __udivdi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload @@ -229,10 +229,10 @@ entry: define signext i8 @urem_i8(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: urem_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 255 -; MIPS32-NEXT: andi $2, $4, 255 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 255 +; MIPS32-NEXT: andi $1, $4, 255 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mfhi $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -246,10 +246,10 @@ entry: define signext i16 @urem_i16(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: urem_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 65535 -; MIPS32-NEXT: andi $2, $4, 65535 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 65535 +; MIPS32-NEXT: andi $1, $4, 65535 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mfhi $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -280,12 +280,12 @@ define signext i64 @urem_i64(i64 signext %a, i64 signext %b) { ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __umoddi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll index 7420a15cad3b0..c292dba16ce36 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll @@ -4,9 +4,9 @@ define i8 @select_i8(i1 %test, i8 %a, i8 %b) { ; MIPS32-LABEL: select_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -17,9 +17,9 @@ entry: define i16 @select_i16(i1 %test, i16 %a, i16 %b) { ; MIPS32-LABEL: select_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -30,9 +30,9 @@ entry: define i32 @select_i32(i1 %test, i32 %a, i32 %b) { ; MIPS32-LABEL: select_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -43,9 +43,9 @@ entry: define i32* @select_ptr(i1 %test, i32* %a, i32* %b) { ; MIPS32-LABEL: select_ptr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -56,12 +56,12 @@ entry: define i32 @select_with_negation(i32 %a, i32 %b, i32 %x, i32 %y) { ; MIPS32-LABEL: select_with_negation: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: slt $2, $4, $5 -; MIPS32-NEXT: xor $1, $2, $1 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $7, $6, $1 ; MIPS32-NEXT: move $2, $7 +; MIPS32-NEXT: ori $3, $zero, 1 +; MIPS32-NEXT: slt $1, $4, $5 +; MIPS32-NEXT: xor $1, $1, $3 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: movn $2, $6, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -74,19 +74,13 @@ entry: define i64 @select_i64(i1 %test, i64 %a, i64 %b) { ; MIPS32-LABEL: select_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $sp, $sp, -8 -; MIPS32-NEXT: .cfi_def_cfa_offset 8 -; MIPS32-NEXT: addiu $1, $sp, 24 -; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 28 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: andi $3, $4, 1 -; MIPS32-NEXT: movn $1, $6, $3 -; MIPS32-NEXT: movn $2, $7, $3 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: move $2, $1 -; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: addiu $sp, $sp, 8 +; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $2, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $3, 0($1) +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $6, $1 +; MIPS32-NEXT: movn $3, $7, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -97,11 +91,11 @@ entry: define void @select_ambiguous_i64_in_fpr(i1 %test, i64* %i64_ptr_a, i64* %i64_ptr_b, i64* %i64_ptr_c) { ; MIPS32-LABEL: select_ambiguous_i64_in_fpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ldc1 $f0, 0($5) -; MIPS32-NEXT: ldc1 $f2, 0($6) +; MIPS32-NEXT: ldc1 $f2, 0($5) +; MIPS32-NEXT: ldc1 $f0, 0($6) ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn.d $f2, $f0, $1 -; MIPS32-NEXT: sdc1 $f2, 0($7) +; MIPS32-NEXT: movn.d $f0, $f2, $1 +; MIPS32-NEXT: sdc1 $f0, 0($7) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -116,10 +110,9 @@ define float @select_float(i1 %test, float %a, float %b) { ; MIPS32-LABEL: select_float: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: mtc1 $5, $f0 -; MIPS32-NEXT: mtc1 $6, $f1 -; MIPS32-NEXT: movn.s $f1, $f0, $1 -; MIPS32-NEXT: mov.s $f0, $f1 +; MIPS32-NEXT: mtc1 $5, $f1 +; MIPS32-NEXT: mtc1 $6, $f0 +; MIPS32-NEXT: movn.s $f0, $f1, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -130,11 +123,11 @@ entry: define void @select_ambiguous_float_in_gpr(i1 %test, float* %f32_ptr_a, float* %f32_ptr_b, float* %f32_ptr_c) { ; MIPS32-LABEL: select_ambiguous_float_in_gpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($5) -; MIPS32-NEXT: lw $2, 0($6) +; MIPS32-NEXT: lw $2, 0($5) +; MIPS32-NEXT: lw $1, 0($6) ; MIPS32-NEXT: andi $3, $4, 1 -; MIPS32-NEXT: movn $2, $1, $3 -; MIPS32-NEXT: sw $2, 0($7) +; MIPS32-NEXT: movn $1, $2, $3 +; MIPS32-NEXT: sw $1, 0($7) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -148,11 +141,11 @@ entry: define double @select_double(double %a, double %b, i1 %test) { ; MIPS32-LABEL: select_double: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mov.d $f0, $f14 ; MIPS32-NEXT: addiu $1, $sp, 16 ; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn.d $f14, $f12, $1 -; MIPS32-NEXT: mov.d $f0, $f14 +; MIPS32-NEXT: movn.d $f0, $f12, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll index 0017f0c0ed08a..07d094604684d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll @@ -143,10 +143,10 @@ define float @u32tof32(i32 zeroext %a) { ; FP32-NEXT: lui $1, 17200 ; FP32-NEXT: mtc1 $4, $f0 ; FP32-NEXT: mtc1 $1, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: cvt.s.d $f0, $f0 ; FP32-NEXT: jr $ra @@ -157,10 +157,10 @@ define float @u32tof32(i32 zeroext %a) { ; FP64-NEXT: lui $1, 17200 ; FP64-NEXT: mtc1 $4, $f0 ; FP64-NEXT: mthc1 $1, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: cvt.s.d $f0, $f0 ; FP64-NEXT: jr $ra @@ -177,10 +177,10 @@ define float @u16tof32(i16 zeroext %a) { ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: cvt.s.d $f0, $f0 ; FP32-NEXT: jr $ra @@ -192,10 +192,10 @@ define float @u16tof32(i16 zeroext %a) { ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: cvt.s.d $f0, $f0 ; FP64-NEXT: jr $ra @@ -212,10 +212,10 @@ define float @u8tof32(i8 zeroext %a) { ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: cvt.s.d $f0, $f0 ; FP32-NEXT: jr $ra @@ -227,10 +227,10 @@ define float @u8tof32(i8 zeroext %a) { ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: cvt.s.d $f0, $f0 ; FP64-NEXT: jr $ra @@ -264,10 +264,10 @@ define double @u32tof64(i32 zeroext %a) { ; FP32-NEXT: lui $1, 17200 ; FP32-NEXT: mtc1 $4, $f0 ; FP32-NEXT: mtc1 $1, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -277,10 +277,10 @@ define double @u32tof64(i32 zeroext %a) { ; FP64-NEXT: lui $1, 17200 ; FP64-NEXT: mtc1 $4, $f0 ; FP64-NEXT: mthc1 $1, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop @@ -296,10 +296,10 @@ define double @u16tof64(i16 zeroext %a) { ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -310,10 +310,10 @@ define double @u16tof64(i16 zeroext %a) { ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop @@ -329,10 +329,10 @@ define double @u8tof64(i8 zeroext %a) { ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -343,10 +343,10 @@ define double @u8tof64(i8 zeroext %a) { ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll index 37c40392e7a02..256655a054694 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll @@ -15,10 +15,10 @@ define void @store_float_align1(float %a) { ; MIPS32-LABEL: store_float_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align1) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) +; MIPS32-NEXT: addiu $2, $1, %lo(float_align1) +; MIPS32-NEXT: mfc1 $1, $f12 +; MIPS32-NEXT: swl $1, 3($2) +; MIPS32-NEXT: swr $1, 0($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -37,10 +37,10 @@ define void @store_float_align2(float %a) { ; MIPS32-LABEL: store_float_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align2) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) +; MIPS32-NEXT: addiu $2, $1, %lo(float_align2) +; MIPS32-NEXT: mfc1 $1, $f12 +; MIPS32-NEXT: swl $1, 3($2) +; MIPS32-NEXT: swr $1, 0($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll index 7d068633a505d..333b24a93684c 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll @@ -204,12 +204,12 @@ define void @store6align1(%struct.MemSize6_Align1* %S, i64 %a) { ; MIPS32-LABEL: store6align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: swl $6, 3($4) ; MIPS32-NEXT: swr $6, 0($4) ; MIPS32-NEXT: sb $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 8 -; MIPS32-NEXT: sb $2, 1($1) +; MIPS32-NEXT: srl $1, $7, 8 +; MIPS32-NEXT: sb $1, 1($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -290,25 +290,25 @@ define void @store7align1(%struct.MemSize7_Align1* %S, i64 %a) { ; MIPS32-LABEL: store7align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: swl $6, 3($4) ; MIPS32-NEXT: swr $6, 0($4) ; MIPS32-NEXT: sb $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 8 -; MIPS32-NEXT: sb $2, 1($1) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 8 +; MIPS32-NEXT: sb $1, 1($2) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align1: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align1* %S to i56* @@ -321,23 +321,23 @@ define void @store7align2(%struct.MemSize7_Align2* %S, i64 %a) { ; MIPS32-LABEL: store7align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: swl $6, 3($4) ; MIPS32-NEXT: swr $6, 0($4) ; MIPS32-NEXT: sh $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align2: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align2* %S to i56* @@ -350,22 +350,22 @@ define void @store7align4(%struct.MemSize7_Align4* %S, i64 %a) { ; MIPS32-LABEL: store7align4: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: sw $6, 0($4) ; MIPS32-NEXT: sh $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align4: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align4* %S to i56* @@ -378,22 +378,22 @@ define void @store7align8(%struct.MemSize7_Align8* %S, i64 %a) { ; MIPS32-LABEL: store7align8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: sw $6, 0($4) ; MIPS32-NEXT: sh $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align8: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align8* %S to i56* @@ -406,13 +406,13 @@ define void @store_double_align1(double %a) { ; MIPS32-LABEL: store_double_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align1) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: mfc1 $3, $f13 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) -; MIPS32-NEXT: swl $3, 7($1) -; MIPS32-NEXT: swr $3, 4($1) +; MIPS32-NEXT: addiu $2, $1, %lo(double_align1) +; MIPS32-NEXT: mfc1 $3, $f12 +; MIPS32-NEXT: mfc1 $1, $f13 +; MIPS32-NEXT: swl $3, 3($2) +; MIPS32-NEXT: swr $3, 0($2) +; MIPS32-NEXT: swl $1, 7($2) +; MIPS32-NEXT: swr $1, 4($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -431,13 +431,13 @@ define void @store_double_align2(double %a) { ; MIPS32-LABEL: store_double_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align2) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: mfc1 $3, $f13 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) -; MIPS32-NEXT: swl $3, 7($1) -; MIPS32-NEXT: swr $3, 4($1) +; MIPS32-NEXT: addiu $2, $1, %lo(double_align2) +; MIPS32-NEXT: mfc1 $3, $f12 +; MIPS32-NEXT: mfc1 $1, $f13 +; MIPS32-NEXT: swl $3, 3($2) +; MIPS32-NEXT: swr $3, 0($2) +; MIPS32-NEXT: swl $1, 7($2) +; MIPS32-NEXT: swr $1, 4($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -456,11 +456,11 @@ define void @store_double_align4(double %a) { ; MIPS32-LABEL: store_double_align4: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align4) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align4) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: mfc1 $3, $f13 -; MIPS32-NEXT: sw $2, 0($1) -; MIPS32-NEXT: sw $3, 4($1) +; MIPS32-NEXT: addiu $2, $1, %lo(double_align4) +; MIPS32-NEXT: mfc1 $3, $f12 +; MIPS32-NEXT: mfc1 $1, $f13 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll index 66dc761c5fa3a..ac98a1be898d2 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll @@ -88,10 +88,10 @@ define i64 @sub_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: sub_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: subu $2, $6, $4 -; MIPS32-NEXT: sltu $1, $6, $4 -; MIPS32-NEXT: subu $3, $7, $5 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: subu $3, $3, $1 +; MIPS32-NEXT: sltu $3, $6, $4 +; MIPS32-NEXT: subu $1, $7, $5 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: subu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -102,38 +102,37 @@ entry: define i128 @sub_i128(i128 %a, i128 %b) { ; MIPS32-LABEL: sub_i128: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $10, $5 +; MIPS32-NEXT: move $9, $6 ; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $3, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $6, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 24 +; MIPS32-NEXT: lw $5, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 20 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 24 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: addiu $8, $sp, 28 -; MIPS32-NEXT: lw $8, 0($8) -; MIPS32-NEXT: subu $9, $1, $4 -; MIPS32-NEXT: sltu $1, $1, $4 -; MIPS32-NEXT: subu $4, $2, $5 -; MIPS32-NEXT: andi $10, $1, 1 -; MIPS32-NEXT: subu $4, $4, $10 -; MIPS32-NEXT: xor $10, $2, $5 -; MIPS32-NEXT: sltiu $10, $10, 1 -; MIPS32-NEXT: sltu $2, $2, $5 -; MIPS32-NEXT: andi $5, $10, 1 -; MIPS32-NEXT: movn $2, $1, $5 -; MIPS32-NEXT: subu $1, $3, $6 -; MIPS32-NEXT: andi $5, $2, 1 -; MIPS32-NEXT: subu $1, $1, $5 -; MIPS32-NEXT: xor $5, $3, $6 -; MIPS32-NEXT: sltiu $5, $5, 1 -; MIPS32-NEXT: sltu $3, $3, $6 +; MIPS32-NEXT: subu $2, $3, $4 +; MIPS32-NEXT: sltu $4, $3, $4 +; MIPS32-NEXT: subu $3, $6, $10 +; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: subu $3, $3, $8 +; MIPS32-NEXT: xor $8, $6, $10 +; MIPS32-NEXT: sltiu $8, $8, 1 +; MIPS32-NEXT: sltu $6, $6, $10 +; MIPS32-NEXT: andi $8, $8, 1 +; MIPS32-NEXT: movn $6, $4, $8 +; MIPS32-NEXT: subu $4, $5, $9 +; MIPS32-NEXT: andi $8, $6, 1 +; MIPS32-NEXT: subu $4, $4, $8 +; MIPS32-NEXT: xor $8, $5, $9 +; MIPS32-NEXT: sltiu $8, $8, 1 +; MIPS32-NEXT: sltu $5, $5, $9 +; MIPS32-NEXT: andi $8, $8, 1 +; MIPS32-NEXT: movn $5, $6, $8 +; MIPS32-NEXT: subu $1, $1, $7 ; MIPS32-NEXT: andi $5, $5, 1 -; MIPS32-NEXT: movn $3, $2, $5 -; MIPS32-NEXT: subu $2, $8, $7 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: subu $5, $2, $3 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: move $3, $4 -; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: subu $5, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll index 6ad041d3a6886..8ce695f073629 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll @@ -4,9 +4,9 @@ define void @sub_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) { ; P5600-LABEL: sub_v16i8: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.b $w0, 0($4) -; P5600-NEXT: ld.b $w1, 0($5) -; P5600-NEXT: subv.b $w0, $w1, $w0 +; P5600-NEXT: ld.b $w1, 0($4) +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: subv.b $w0, $w0, $w1 ; P5600-NEXT: st.b $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -21,9 +21,9 @@ entry: define void @sub_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) { ; P5600-LABEL: sub_v8i16: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.h $w0, 0($4) -; P5600-NEXT: ld.h $w1, 0($5) -; P5600-NEXT: subv.h $w0, $w1, $w0 +; P5600-NEXT: ld.h $w1, 0($4) +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: subv.h $w0, $w0, $w1 ; P5600-NEXT: st.h $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -38,9 +38,9 @@ entry: define void @sub_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) { ; P5600-LABEL: sub_v4i32: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.w $w0, 0($4) -; P5600-NEXT: ld.w $w1, 0($5) -; P5600-NEXT: subv.w $w0, $w1, $w0 +; P5600-NEXT: ld.w $w1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: subv.w $w0, $w0, $w1 ; P5600-NEXT: st.w $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -55,9 +55,9 @@ entry: define void @sub_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) { ; P5600-LABEL: sub_v2i64: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.d $w0, 0($4) -; P5600-NEXT: ld.d $w1, 0($5) -; P5600-NEXT: subv.d $w0, $w1, $w0 +; P5600-NEXT: ld.d $w1, 0($4) +; P5600-NEXT: ld.d $w0, 0($5) +; P5600-NEXT: subv.d $w0, $w0, $w1 ; P5600-NEXT: st.d $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll index 25e87da5ae425..d81e3edf8dd30 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll @@ -26,9 +26,9 @@ entry: define i32 @outgoing_gpr_instr(i32* %i32_ptr1, i32* %i32_ptr2) { ; MIPS32-LABEL: outgoing_gpr_instr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lw $2, 0($5) -; MIPS32-NEXT: addu $2, $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: addu $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -56,10 +56,10 @@ entry: define i32 @incoming_gpr(i32 %incoming_phys_reg, i1 %test, i32* %a) { ; MIPS32-LABEL: incoming_gpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($6) -; MIPS32-NEXT: andi $2, $5, 1 -; MIPS32-NEXT: movn $4, $1, $2 ; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: lw $1, 0($6) +; MIPS32-NEXT: andi $3, $5, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -71,10 +71,10 @@ entry: define float @incoming_fpr(float %incoming_phys_reg, i1 %test, float* %a) { ; MIPS32-LABEL: incoming_fpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lwc1 $f0, 0($6) -; MIPS32-NEXT: andi $1, $5, 1 -; MIPS32-NEXT: movn.s $f12, $f0, $1 ; MIPS32-NEXT: mov.s $f0, $f12 +; MIPS32-NEXT: lwc1 $f1, 0($6) +; MIPS32-NEXT: andi $1, $5, 1 +; MIPS32-NEXT: movn.s $f0, $f1, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -103,11 +103,10 @@ entry: define float @incoming_float_instr(float %val1, float %val2, float* %float_ptr, i1 %test) { ; MIPS32-LABEL: incoming_float_instr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lwc1 $f0, 0($6) -; MIPS32-NEXT: add.s $f1, $f14, $f12 +; MIPS32-NEXT: lwc1 $f1, 0($6) +; MIPS32-NEXT: add.s $f0, $f14, $f12 ; MIPS32-NEXT: andi $1, $7, 1 -; MIPS32-NEXT: movn.s $f1, $f0, $1 -; MIPS32-NEXT: mov.s $f0, $f1 +; MIPS32-NEXT: movn.s $f0, $f1, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll index 91a82f2a7fccc..fa6bf93d45d7e 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll @@ -13,6 +13,7 @@ define void @testVaCopyArg(i8* %fmt, ...) { ; MIPS32-NEXT: .cfi_def_cfa_offset 40 ; MIPS32-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: move $3, $4 ; MIPS32-NEXT: addiu $1, $sp, 44 ; MIPS32-NEXT: sw $5, 0($1) ; MIPS32-NEXT: addiu $1, $sp, 48 @@ -20,24 +21,23 @@ define void @testVaCopyArg(i8* %fmt, ...) { ; MIPS32-NEXT: addiu $1, $sp, 52 ; MIPS32-NEXT: sw $7, 0($1) ; MIPS32-NEXT: lui $1, %hi($.str) -; MIPS32-NEXT: addiu $1, $1, %lo($.str) -; MIPS32-NEXT: addiu $2, $sp, 32 -; MIPS32-NEXT: addiu $3, $sp, 28 +; MIPS32-NEXT: addiu $4, $1, %lo($.str) +; MIPS32-NEXT: addiu $6, $sp, 32 +; MIPS32-NEXT: addiu $2, $sp, 28 ; MIPS32-NEXT: addiu $5, $sp, 24 -; MIPS32-NEXT: addiu $6, $sp, 20 -; MIPS32-NEXT: sw $4, 0($2) -; MIPS32-NEXT: addiu $2, $sp, 44 -; MIPS32-NEXT: sw $2, 0($3) -; MIPS32-NEXT: lw $2, 0($3) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: sw $3, 0($6) +; MIPS32-NEXT: addiu $3, $sp, 44 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: lw $2, 0($2) ; MIPS32-NEXT: sw $2, 0($5) ; MIPS32-NEXT: lw $2, 0($5) ; MIPS32-NEXT: ori $3, $zero, 4 ; MIPS32-NEXT: addu $3, $2, $3 ; MIPS32-NEXT: sw $3, 0($5) ; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: sw $2, 0($6) -; MIPS32-NEXT: lw $5, 0($6) -; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $5, 0($1) ; MIPS32-NEXT: jal printf ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll index ec4252c17dee1..e42e0fe3a6b21 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll @@ -113,9 +113,8 @@ entry: define i64 @load4_s32_to_sextLoad4_s64(i32* %px) { ; MIPS32-LABEL: load4_s32_to_sextLoad4_s64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: sra $3, $1, 31 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: sra $3, $2, 31 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll index 7f54f810b8ddc..1e2954542a064 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll @@ -4,8 +4,8 @@ define i64 @zext(i32 %x) { ; MIPS32-LABEL: zext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $3, $zero, 0 ; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: ori $3, $zero, 0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -16,8 +16,8 @@ entry: define i64 @sext(i32 %x) { ; MIPS32-LABEL: sext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sra $3, $4, 31 ; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: sra $3, $2, 31 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/atomic-min-max.ll b/llvm/test/CodeGen/Mips/atomic-min-max.ll index 646af650c00e7..8fa95e6d5e4d1 100644 --- a/llvm/test/CodeGen/Mips/atomic-min-max.ll +++ b/llvm/test/CodeGen/Mips/atomic-min-max.ll @@ -829,38 +829,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB4_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB4_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB4_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -869,38 +869,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB4_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB4_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB4_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -908,37 +908,37 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB4_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB4_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB4_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -946,38 +946,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB4_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB4_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB4_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -985,39 +985,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB4_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB4_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB4_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -1026,39 +1026,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB4_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB4_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB4_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -1066,38 +1066,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB4_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB4_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB4_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -1105,39 +1105,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB4_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB4_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB4_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -1145,38 +1145,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB4_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB4_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB4_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -1185,38 +1185,38 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB4_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB4_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB4_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -1224,39 +1224,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB4_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB4_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB4_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -1265,39 +1265,39 @@ define i16 @test_max_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB4_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB4_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -1310,38 +1310,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB5_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB5_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB5_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -1350,38 +1350,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB5_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB5_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB5_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -1389,37 +1389,37 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB5_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB5_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB5_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -1427,38 +1427,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB5_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB5_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB5_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -1466,39 +1466,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB5_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB5_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB5_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -1507,39 +1507,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB5_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB5_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB5_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -1547,38 +1547,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB5_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB5_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB5_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -1586,39 +1586,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB5_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB5_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB5_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -1626,38 +1626,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB5_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB5_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB5_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -1666,38 +1666,38 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB5_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB5_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB5_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -1705,39 +1705,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB5_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB5_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB5_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -1746,39 +1746,39 @@ define i16 @test_min_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB5_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB5_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -1791,38 +1791,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB6_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB6_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB6_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -1831,38 +1831,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB6_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB6_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB6_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -1870,37 +1870,37 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB6_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB6_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB6_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -1908,38 +1908,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB6_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB6_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB6_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -1947,39 +1947,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB6_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB6_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB6_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -1988,39 +1988,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB6_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB6_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB6_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -2028,38 +2028,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB6_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB6_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB6_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -2067,39 +2067,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB6_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB6_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB6_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -2107,38 +2107,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB6_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB6_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB6_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -2147,38 +2147,38 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB6_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB6_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB6_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -2186,39 +2186,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB6_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB6_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB6_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -2227,39 +2227,39 @@ define i16 @test_umax_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB6_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB6_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -2272,38 +2272,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB7_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB7_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB7_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -2312,38 +2312,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB7_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB7_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB7_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -2351,37 +2351,37 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB7_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB7_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB7_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -2389,38 +2389,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB7_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB7_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB7_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -2428,39 +2428,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB7_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB7_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB7_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -2469,39 +2469,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB7_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB7_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB7_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -2509,38 +2509,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB7_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB7_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB7_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -2548,39 +2548,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB7_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB7_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB7_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -2588,38 +2588,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB7_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB7_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB7_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -2628,38 +2628,38 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB7_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB7_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB7_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -2667,39 +2667,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB7_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB7_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB7_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -2708,39 +2708,39 @@ define i16 @test_umin_16(i16* nocapture %ptr, i16 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB7_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB7_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -2754,38 +2754,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB8_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB8_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB8_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -2794,38 +2794,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB8_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB8_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB8_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -2833,37 +2833,37 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB8_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB8_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB8_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -2871,38 +2871,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB8_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB8_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB8_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -2910,39 +2910,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB8_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB8_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB8_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -2951,39 +2951,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB8_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB8_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB8_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -2991,38 +2991,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB8_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB8_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB8_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -3030,39 +3030,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB8_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB8_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB8_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -3070,38 +3070,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB8_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB8_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB8_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -3110,38 +3110,38 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB8_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB8_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -3149,39 +3149,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB8_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB8_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB8_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -3190,39 +3190,39 @@ define i8 @test_max_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB8_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB8_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -3235,38 +3235,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB9_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB9_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB9_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -3275,38 +3275,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB9_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB9_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB9_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -3314,37 +3314,37 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB9_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB9_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB9_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -3352,38 +3352,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB9_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB9_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB9_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -3391,39 +3391,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB9_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB9_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB9_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -3432,39 +3432,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB9_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB9_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB9_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -3472,38 +3472,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB9_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB9_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB9_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -3511,39 +3511,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB9_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB9_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB9_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -3551,38 +3551,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB9_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB9_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB9_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -3591,38 +3591,38 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB9_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB9_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -3630,39 +3630,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB9_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB9_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB9_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -3671,39 +3671,39 @@ define i8 @test_min_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB9_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB9_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -3716,38 +3716,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB10_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB10_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB10_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -3756,38 +3756,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB10_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB10_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB10_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -3795,37 +3795,37 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB10_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB10_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB10_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -3833,38 +3833,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB10_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB10_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB10_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -3872,39 +3872,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB10_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB10_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB10_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -3913,39 +3913,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB10_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB10_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB10_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -3953,38 +3953,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB10_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB10_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB10_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -3992,39 +3992,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB10_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB10_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB10_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -4032,38 +4032,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB10_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB10_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB10_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -4072,38 +4072,38 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB10_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB10_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -4111,39 +4111,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB10_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB10_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB10_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -4152,39 +4152,39 @@ define i8 @test_umax_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB10_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB10_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -4197,38 +4197,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB11_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB11_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB11_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -4237,38 +4237,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB11_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB11_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB11_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -4276,37 +4276,37 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB11_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB11_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB11_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -4314,38 +4314,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB11_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB11_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB11_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -4353,39 +4353,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB11_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB11_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB11_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -4394,39 +4394,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB11_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB11_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB11_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -4434,38 +4434,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB11_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB11_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB11_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -4473,39 +4473,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB11_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB11_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB11_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -4513,38 +4513,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB11_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB11_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB11_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -4553,38 +4553,38 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB11_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB11_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -4592,39 +4592,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB11_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB11_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB11_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -4633,39 +4633,39 @@ define i8 @test_umin_8(i8* nocapture %ptr, i8 signext %val) { ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB11_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB11_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll index 59ff83e4969cc..d7cfde7f1c046 100644 --- a/llvm/test/CodeGen/Mips/atomic.ll +++ b/llvm/test/CodeGen/Mips/atomic.ll @@ -57,13 +57,13 @@ define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB0_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: addu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB0_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB0_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -107,13 +107,13 @@ define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB0_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: addu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB0_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB0_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -191,13 +191,13 @@ define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB0_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: addu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -313,13 +313,13 @@ define i32 @AtomicLoadSub32(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB1_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: subu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB1_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: subu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB1_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -363,13 +363,13 @@ define i32 @AtomicLoadSub32(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB1_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: subu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB1_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: subu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB1_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -447,13 +447,13 @@ define i32 @AtomicLoadSub32(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB1_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: subu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: subu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -569,13 +569,13 @@ define i32 @AtomicLoadXor32(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB2_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: xor $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB2_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: xor $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB2_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -619,13 +619,13 @@ define i32 @AtomicLoadXor32(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB2_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: xor $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB2_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: xor $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB2_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -703,13 +703,13 @@ define i32 @AtomicLoadXor32(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB2_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: xor $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -824,13 +824,13 @@ define i32 @AtomicLoadOr32(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB3_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: or $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB3_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: or $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB3_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -874,13 +874,13 @@ define i32 @AtomicLoadOr32(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB3_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: or $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB3_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: or $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB3_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -958,13 +958,13 @@ define i32 @AtomicLoadOr32(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB3_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: or $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1079,13 +1079,13 @@ define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB4_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: and $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB4_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB4_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -1129,13 +1129,13 @@ define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB4_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: and $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB4_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB4_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -1213,13 +1213,13 @@ define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB4_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1335,14 +1335,14 @@ define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB5_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: and $3, $2, $4 -; MIPS32O0-NEXT: nor $3, $zero, $3 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB5_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: nor $1, $zero, $1 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB5_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -1388,14 +1388,14 @@ define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB5_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: and $3, $2, $4 -; MIPS32R6O0-NEXT: nor $3, $zero, $3 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB5_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: nor $1, $zero, $1 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB5_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -1477,14 +1477,14 @@ define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB5_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: nor $3, $zero, $3 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1609,17 +1609,16 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind { ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sw $4, 4($sp) -; MIPS32O0-NEXT: lw $2, 4($sp) -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $4, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB6_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $3, 0($1) -; MIPS32O0-NEXT: move $4, $2 -; MIPS32O0-NEXT: sc $4, 0($1) -; MIPS32O0-NEXT: beqz $4, $BB6_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: move $1, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB6_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $3 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -1669,16 +1668,15 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind { ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sw $4, 4($sp) -; MIPS32R6O0-NEXT: lw $2, 4($sp) -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $4, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB6_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $3, 0($1) -; MIPS32R6O0-NEXT: move $4, $2 -; MIPS32R6O0-NEXT: sc $4, 0($1) -; MIPS32R6O0-NEXT: beqzc $4, $BB6_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: move $1, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB6_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $3 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -1764,18 +1762,17 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: sw $4, 12($sp) -; MIPS64R6O0-NEXT: lw $2, 12($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: sw $2, 12($sp) +; MIPS64R6O0-NEXT: lw $4, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB6_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: move $4, $2 -; MIPS64R6O0-NEXT: sc $4, 0($1) -; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -1912,24 +1909,23 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sw $5, 4($sp) -; MIPS32O0-NEXT: lw $2, 4($sp) -; MIPS32O0-NEXT: lw $1, %got(x)($1) -; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: lw $6, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: move $5, $4 ; MIPS32O0-NEXT: $BB7_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $5, 0($1) -; MIPS32O0-NEXT: bne $5, $3, $BB7_3 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: bne $2, $5, $BB7_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS32O0-NEXT: move $6, $2 -; MIPS32O0-NEXT: sc $6, 0($1) -; MIPS32O0-NEXT: beqz $6, $BB7_1 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB7_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB7_3: # %entry -; MIPS32O0-NEXT: xor $1, $5, $4 +; MIPS32O0-NEXT: xor $1, $2, $4 ; MIPS32O0-NEXT: sltiu $1, $1, 1 -; MIPS32O0-NEXT: move $2, $5 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -1986,19 +1982,18 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sw $5, 4($sp) -; MIPS32R6O0-NEXT: lw $2, 4($sp) -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $5, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB7_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $3, 0($1) -; MIPS32R6O0-NEXT: bnec $3, $4, $BB7_3 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS32R6O0-NEXT: move $5, $2 -; MIPS32R6O0-NEXT: sc $5, 0($1) -; MIPS32R6O0-NEXT: beqzc $5, $BB7_1 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB7_1 ; MIPS32R6O0-NEXT: $BB7_3: # %entry -; MIPS32R6O0-NEXT: move $2, $3 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -2100,21 +2095,20 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: sw $5, 12($sp) -; MIPS64R6O0-NEXT: lw $2, 12($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sw $2, 12($sp) +; MIPS64R6O0-NEXT: lw $5, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB7_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS64R6O0-NEXT: move $5, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ; MIPS64R6O0-NEXT: .LBB7_3: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -2286,34 +2280,34 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB8_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: addu $8, $7, $4 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB8_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB8_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -2387,31 +2381,31 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB8_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: addu $9, $8, $4 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB8_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB8_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -2554,33 +2548,33 @@ define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB8_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: addu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -2802,34 +2796,34 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB9_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: subu $8, $7, $4 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB9_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: subu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB9_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -2903,31 +2897,31 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB9_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: subu $9, $8, $4 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB9_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: subu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB9_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -3070,33 +3064,33 @@ define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB9_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: subu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: subu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3320,35 +3314,35 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB10_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: and $8, $7, $4 -; MIPS32O0-NEXT: nor $8, $zero, $8 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB10_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $2, $6 +; MIPS32O0-NEXT: nor $3, $zero, $3 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB10_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -3424,32 +3418,32 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB10_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: and $9, $8, $4 -; MIPS32R6O0-NEXT: nor $9, $zero, $9 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB10_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $2, $6 +; MIPS32R6O0-NEXT: nor $3, $zero, $3 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB10_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -3596,34 +3590,34 @@ define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB10_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: and $8, $7, $4 -; MIPS64R6O0-NEXT: nor $8, $zero, $8 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $2, $6 +; MIPS64R6O0-NEXT: nor $3, $zero, $3 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3850,33 +3844,33 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB11_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: and $8, $4, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB11_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $6, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB11_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -3948,30 +3942,30 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB11_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: and $9, $4, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB11_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $6, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB11_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -4110,32 +4104,32 @@ define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB11_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: and $8, $4, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $6, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4354,42 +4348,44 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(y)($1) -; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 -; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 +; MIPS32O0-NEXT: addu $3, $2, $25 +; MIPS32O0-NEXT: move $1, $5 +; MIPS32O0-NEXT: move $2, $4 +; MIPS32O0-NEXT: lw $3, %got(y)($3) +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 ; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $6, $zero, $3 -; MIPS32O0-NEXT: andi $4, $4, 255 -; MIPS32O0-NEXT: sllv $4, $4, $1 -; MIPS32O0-NEXT: andi $5, $5, 255 -; MIPS32O0-NEXT: sllv $5, $5, $1 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB12_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $8, 0($2) -; MIPS32O0-NEXT: and $9, $8, $3 -; MIPS32O0-NEXT: bne $9, $4, $BB12_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB12_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS32O0-NEXT: and $8, $8, $6 -; MIPS32O0-NEXT: or $8, $8, $5 -; MIPS32O0-NEXT: sc $8, 0($2) -; MIPS32O0-NEXT: beqz $8, $BB12_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB12_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB12_3: # %entry -; MIPS32O0-NEXT: srlv $7, $9, $1 -; MIPS32O0-NEXT: sll $7, $7, 24 -; MIPS32O0-NEXT: sra $7, $7, 24 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.4: # %entry -; MIPS32O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -4470,37 +4466,39 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $5 -; MIPS32R6O0-NEXT: move $3, $4 -; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $6, $zero, -4 -; MIPS32R6O0-NEXT: and $6, $1, $6 -; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $7, $zero, 255 -; MIPS32R6O0-NEXT: sllv $7, $7, $1 -; MIPS32R6O0-NEXT: nor $8, $zero, $7 -; MIPS32R6O0-NEXT: andi $4, $4, 255 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 -; MIPS32R6O0-NEXT: andi $5, $5, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 +; MIPS32R6O0-NEXT: addu $3, $2, $25 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $a1 killed $at +; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0 +; MIPS32R6O0-NEXT: lw $3, %got(y)($3) +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB12_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $10, 0($6) -; MIPS32R6O0-NEXT: and $11, $10, $7 -; MIPS32R6O0-NEXT: bnec $11, $4, $BB12_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS32R6O0-NEXT: and $10, $10, $8 -; MIPS32R6O0-NEXT: or $10, $10, $5 -; MIPS32R6O0-NEXT: sc $10, 0($6) -; MIPS32R6O0-NEXT: beqzc $10, $BB12_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB12_1 ; MIPS32R6O0-NEXT: $BB12_3: # %entry -; MIPS32R6O0-NEXT: srlv $9, $11, $1 -; MIPS32R6O0-NEXT: seb $9, $9 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $9, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry ; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 @@ -4660,38 +4658,38 @@ define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwi ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3) +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $6, $zero, $3 -; MIPS64R6O0-NEXT: andi $4, $4, 255 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 -; MIPS64R6O0-NEXT: andi $5, $5, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $1 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB12_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $8, $3 -; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS64R6O0-NEXT: and $8, $8, $6 -; MIPS64R6O0-NEXT: or $8, $8, $5 -; MIPS64R6O0-NEXT: sc $8, 0($2) -; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1 ; MIPS64R6O0-NEXT: .LBB12_3: # %entry -; MIPS64R6O0-NEXT: srlv $7, $9, $1 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -4935,44 +4933,47 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS32O0-LABEL: AtomicCmpSwapRes8: ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addiu $1, $zero, -4 -; MIPS32O0-NEXT: and $1, $4, $1 -; MIPS32O0-NEXT: andi $2, $4, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: move $2, $5 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 ; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $2 -; MIPS32O0-NEXT: nor $4, $zero, $3 -; MIPS32O0-NEXT: andi $7, $5, 255 -; MIPS32O0-NEXT: sllv $7, $7, $2 -; MIPS32O0-NEXT: andi $6, $6, 255 -; MIPS32O0-NEXT: sllv $6, $6, $2 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB13_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $9, 0($1) -; MIPS32O0-NEXT: and $10, $9, $3 -; MIPS32O0-NEXT: bne $10, $7, $BB13_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB13_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS32O0-NEXT: and $9, $9, $4 -; MIPS32O0-NEXT: or $9, $9, $6 -; MIPS32O0-NEXT: sc $9, 0($1) -; MIPS32O0-NEXT: beqz $9, $BB13_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB13_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB13_3: # %entry -; MIPS32O0-NEXT: srlv $8, $10, $2 -; MIPS32O0-NEXT: sll $8, $8, 24 -; MIPS32O0-NEXT: sra $8, $8, 24 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.4: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 +; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $2, $2, 24 ; MIPS32O0-NEXT: sra $2, $2, 24 -; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: xor $2, $3, $2 -; MIPS32O0-NEXT: sltiu $2, $2, 1 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $1, 1 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -5048,37 +5049,40 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: move $1, $6 ; MIPS32R6O0-NEXT: move $2, $5 -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $4, $3 -; MIPS32R6O0-NEXT: andi $4, $4, 3 -; MIPS32R6O0-NEXT: sll $4, $4, 3 -; MIPS32R6O0-NEXT: ori $7, $zero, 255 -; MIPS32R6O0-NEXT: sllv $7, $7, $4 -; MIPS32R6O0-NEXT: nor $8, $zero, $7 -; MIPS32R6O0-NEXT: andi $9, $5, 255 -; MIPS32R6O0-NEXT: sllv $9, $9, $4 -; MIPS32R6O0-NEXT: andi $6, $6, 255 -; MIPS32R6O0-NEXT: sllv $6, $6, $4 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a2 killed $at +; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB13_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $11, 0($3) -; MIPS32R6O0-NEXT: and $12, $11, $7 -; MIPS32R6O0-NEXT: bnec $12, $9, $BB13_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS32R6O0-NEXT: and $11, $11, $8 -; MIPS32R6O0-NEXT: or $11, $11, $6 -; MIPS32R6O0-NEXT: sc $11, 0($3) -; MIPS32R6O0-NEXT: beqzc $11, $BB13_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB13_1 ; MIPS32R6O0-NEXT: $BB13_3: # %entry -; MIPS32R6O0-NEXT: srlv $10, $12, $4 -; MIPS32R6O0-NEXT: seb $10, $10 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $10, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 @@ -5227,40 +5231,41 @@ define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) n ; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: ; MIPS64R6O0: # %bb.0: # %entry ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 -; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64 -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: daddiu $1, $zero, -4 -; MIPS64R6O0-NEXT: and $1, $4, $1 -; MIPS64R6O0-NEXT: andi $2, $4, 3 -; MIPS64R6O0-NEXT: xori $2, $2, 3 -; MIPS64R6O0-NEXT: sll $2, $2, 3 +; MIPS64R6O0-NEXT: move $3, $4 +; MIPS64R6O0-NEXT: move $1, $6 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $2 -; MIPS64R6O0-NEXT: nor $4, $zero, $3 -; MIPS64R6O0-NEXT: andi $7, $5, 255 -; MIPS64R6O0-NEXT: sllv $7, $7, $2 -; MIPS64R6O0-NEXT: andi $6, $6, 255 -; MIPS64R6O0-NEXT: sllv $6, $6, $2 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB13_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($1) -; MIPS64R6O0-NEXT: and $10, $9, $3 -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $4 -; MIPS64R6O0-NEXT: or $9, $9, $6 -; MIPS64R6O0-NEXT: sc $9, 0($1) -; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1 ; MIPS64R6O0-NEXT: .LBB13_3: # %entry -; MIPS64R6O0-NEXT: srlv $8, $10, $2 -; MIPS64R6O0-NEXT: seb $8, $8 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry -; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: xor $1, $1, $2 ; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -5502,34 +5507,34 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(z)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 65535 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 65535 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB14_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: addu $8, $7, $4 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB14_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB14_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 16 -; MIPS32O0-NEXT: sra $6, $6, 16 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 16 -; MIPS32O0-NEXT: sra $2, $2, 16 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $2, $1, 16 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -5603,31 +5608,31 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(z)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 65535 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB14_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: addu $9, $8, $4 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB14_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB14_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seh $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seh $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seh $2, $1 @@ -5770,33 +5775,33 @@ define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 2 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 2 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB14_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: addu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seh $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seh $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -6025,46 +6030,47 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS32O0: # %bb.0: ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: .cfi_def_cfa_offset 8 -; MIPS32O0-NEXT: addu $1, $5, $6 +; MIPS32O0-NEXT: move $1, $7 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: addu $2, $5, $6 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $4, $2 -; MIPS32O0-NEXT: andi $3, $4, 3 -; MIPS32O0-NEXT: sll $3, $3, 3 -; MIPS32O0-NEXT: ori $4, $zero, 65535 -; MIPS32O0-NEXT: sllv $4, $4, $3 -; MIPS32O0-NEXT: nor $5, $zero, $4 -; MIPS32O0-NEXT: andi $6, $1, 65535 -; MIPS32O0-NEXT: sllv $6, $6, $3 -; MIPS32O0-NEXT: andi $7, $7, 65535 -; MIPS32O0-NEXT: sllv $7, $7, $3 +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 65535 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 65535 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 65535 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $9, 0($2) -; MIPS32O0-NEXT: and $10, $9, $4 -; MIPS32O0-NEXT: bne $10, $6, $BB15_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB15_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS32O0-NEXT: and $9, $9, $5 -; MIPS32O0-NEXT: or $9, $9, $7 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB15_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB15_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB15_3: -; MIPS32O0-NEXT: srlv $8, $10, $3 -; MIPS32O0-NEXT: sll $8, $8, 16 -; MIPS32O0-NEXT: sra $8, $8, 16 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 ; MIPS32O0-NEXT: # %bb.4: ; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: -; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 16 -; MIPS32O0-NEXT: sra $2, $2, 16 -; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: xor $2, $3, $2 -; MIPS32O0-NEXT: sltiu $3, $2, 1 +; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 +; MIPS32O0-NEXT: xor $1, $2, $1 +; MIPS32O0-NEXT: sltiu $3, $1, 1 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -6145,44 +6151,45 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8 ; MIPS32R6O0-NEXT: move $1, $7 -; MIPS32R6O0-NEXT: move $2, $6 -; MIPS32R6O0-NEXT: move $3, $5 -; MIPS32R6O0-NEXT: addu $5, $5, $6 +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a3 killed $at +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1 +; MIPS32R6O0-NEXT: addu $2, $5, $6 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: addiu $6, $zero, -4 -; MIPS32R6O0-NEXT: and $6, $4, $6 -; MIPS32R6O0-NEXT: andi $4, $4, 3 -; MIPS32R6O0-NEXT: sll $4, $4, 3 -; MIPS32R6O0-NEXT: ori $8, $zero, 65535 -; MIPS32R6O0-NEXT: sllv $8, $8, $4 -; MIPS32R6O0-NEXT: nor $9, $zero, $8 -; MIPS32R6O0-NEXT: andi $10, $5, 65535 -; MIPS32R6O0-NEXT: sllv $10, $10, $4 -; MIPS32R6O0-NEXT: andi $7, $7, 65535 -; MIPS32R6O0-NEXT: sllv $7, $7, $4 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 65535 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 65535 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $12, 0($6) -; MIPS32R6O0-NEXT: and $13, $12, $8 -; MIPS32R6O0-NEXT: bnec $13, $10, $BB15_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3 ; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS32R6O0-NEXT: and $12, $12, $9 -; MIPS32R6O0-NEXT: or $12, $12, $7 -; MIPS32R6O0-NEXT: sc $12, 0($6) -; MIPS32R6O0-NEXT: beqzc $12, $BB15_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB15_1 ; MIPS32R6O0-NEXT: $BB15_3: -; MIPS32R6O0-NEXT: srlv $11, $13, $4 -; MIPS32R6O0-NEXT: seh $11, $11 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seh $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: -; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: -; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: seh $2, $1 -; MIPS32R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: xor $2, $3, $2 -; MIPS32R6O0-NEXT: sltiu $3, $2, 1 +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: seh $1, $1 +; MIPS32R6O0-NEXT: xor $1, $2, $1 +; MIPS32R6O0-NEXT: sltiu $3, $1, 1 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -6351,49 +6358,49 @@ define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ; MIPS64R6O0: # %bb.0: ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6O0-NEXT: # kill: def $a3 killed $a3 killed $a3_64 -; MIPS64R6O0-NEXT: sll $1, $7, 0 -; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64 -; MIPS64R6O0-NEXT: sll $2, $6, 0 -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: sll $3, $5, 0 -; MIPS64R6O0-NEXT: addu $2, $3, $2 +; MIPS64R6O0-NEXT: move $3, $4 +; MIPS64R6O0-NEXT: move $1, $7 +; MIPS64R6O0-NEXT: sll $1, $1, 0 +; MIPS64R6O0-NEXT: move $2, $6 +; MIPS64R6O0-NEXT: sll $4, $2, 0 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sll $2, $2, 0 +; MIPS64R6O0-NEXT: addu $2, $2, $4 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 -; MIPS64R6O0-NEXT: and $3, $4, $3 -; MIPS64R6O0-NEXT: andi $4, $4, 3 -; MIPS64R6O0-NEXT: xori $4, $4, 2 -; MIPS64R6O0-NEXT: sll $4, $4, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $4 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: andi $7, $2, 65535 -; MIPS64R6O0-NEXT: sllv $7, $7, $4 +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 2 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 65535 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 ; MIPS64R6O0-NEXT: andi $1, $1, 65535 -; MIPS64R6O0-NEXT: sllv $1, $1, $4 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($3) -; MIPS64R6O0-NEXT: and $10, $9, $5 -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3 ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $6 -; MIPS64R6O0-NEXT: or $9, $9, $1 -; MIPS64R6O0-NEXT: sc $9, 0($3) -; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1 ; MIPS64R6O0-NEXT: .LBB15_3: -; MIPS64R6O0-NEXT: srlv $8, $10, $4 -; MIPS64R6O0-NEXT: seh $8, $8 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seh $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: -; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: -; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: seh $2, $1 -; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: xor $2, $3, $2 -; MIPS64R6O0-NEXT: sltiu $3, $2, 1 +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: seh $1, $1 +; MIPS64R6O0-NEXT: xor $1, $2, $1 +; MIPS64R6O0-NEXT: sltiu $3, $1, 1 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -6620,13 +6627,13 @@ define i32 @CheckSync(i32 signext %v) nounwind noinline { ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $1, %got(countsint)($1) +; MIPS32O0-NEXT: lw $3, %got(countsint)($1) ; MIPS32O0-NEXT: $BB16_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: addu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB16_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB16_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: sync @@ -6675,13 +6682,13 @@ define i32 @CheckSync(i32 signext %v) nounwind noinline { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $1, %got(countsint)($1) +; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1) ; MIPS32R6O0-NEXT: $BB16_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: addu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB16_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB16_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: sync ; MIPS32R6O0-NEXT: jrc $ra @@ -6767,13 +6774,13 @@ define i32 @CheckSync(i32 signext %v) nounwind noinline { ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1) ; MIPS64R6O0-NEXT: .LBB16_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: addu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB16_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra @@ -6918,29 +6925,29 @@ define i32 @zeroreg() nounwind { ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $1, %got(a)($1) -; MIPS32O0-NEXT: addiu $2, $zero, 0 -; MIPS32O0-NEXT: addiu $3, $zero, 1 -; MIPS32O0-NEXT: move $4, $3 +; MIPS32O0-NEXT: lw $4, %got(a)($1) +; MIPS32O0-NEXT: addiu $6, $zero, 0 +; MIPS32O0-NEXT: addiu $2, $zero, 1 +; MIPS32O0-NEXT: move $5, $2 ; MIPS32O0-NEXT: $BB17_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $5, 0($1) -; MIPS32O0-NEXT: bne $5, $4, $BB17_3 +; MIPS32O0-NEXT: ll $1, 0($4) +; MIPS32O0-NEXT: bne $1, $5, $BB17_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS32O0-NEXT: move $6, $2 -; MIPS32O0-NEXT: sc $6, 0($1) -; MIPS32O0-NEXT: beqz $6, $BB17_1 +; MIPS32O0-NEXT: move $3, $6 +; MIPS32O0-NEXT: sc $3, 0($4) +; MIPS32O0-NEXT: beqz $3, $BB17_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB17_3: # %entry -; MIPS32O0-NEXT: xor $1, $5, $3 -; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: xor $2, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $2, 1 ; MIPS32O0-NEXT: sync ; MIPS32O0-NEXT: addiu $2, $zero, 1 -; MIPS32O0-NEXT: xor $2, $5, $2 -; MIPS32O0-NEXT: sltiu $2, $2, 1 -; MIPS32O0-NEXT: andi $2, $2, 1 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: andi $2, $1, 1 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -7001,21 +7008,21 @@ define i32 @zeroreg() nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $1, %got(a)($1) -; MIPS32R6O0-NEXT: addiu $2, $zero, 0 -; MIPS32R6O0-NEXT: addiu $3, $zero, 1 -; MIPS32R6O0-NEXT: move $4, $3 +; MIPS32R6O0-NEXT: lw $4, %got(a)($1) +; MIPS32R6O0-NEXT: addiu $6, $zero, 0 +; MIPS32R6O0-NEXT: addiu $2, $zero, 1 +; MIPS32R6O0-NEXT: move $5, $2 ; MIPS32R6O0-NEXT: $BB17_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $5, 0($1) -; MIPS32R6O0-NEXT: bnec $5, $4, $BB17_3 +; MIPS32R6O0-NEXT: ll $1, 0($4) +; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS32R6O0-NEXT: move $6, $2 -; MIPS32R6O0-NEXT: sc $6, 0($1) -; MIPS32R6O0-NEXT: beqzc $6, $BB17_1 +; MIPS32R6O0-NEXT: move $3, $6 +; MIPS32R6O0-NEXT: sc $3, 0($4) +; MIPS32R6O0-NEXT: beqzc $3, $BB17_1 ; MIPS32R6O0-NEXT: $BB17_3: # %entry -; MIPS32R6O0-NEXT: xor $1, $5, $3 +; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: sync ; MIPS32R6O0-NEXT: jrc $ra @@ -7131,21 +7138,21 @@ define i32 @zeroreg() nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: ld $1, %got_disp(a)($1) -; MIPS64R6O0-NEXT: addiu $2, $zero, 0 -; MIPS64R6O0-NEXT: addiu $3, $zero, 1 -; MIPS64R6O0-NEXT: move $4, $3 +; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($1) +; MIPS64R6O0-NEXT: addiu $6, $zero, 0 +; MIPS64R6O0-NEXT: addiu $2, $zero, 1 +; MIPS64R6O0-NEXT: move $5, $2 ; MIPS64R6O0-NEXT: .LBB17_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $5, 0($1) -; MIPS64R6O0-NEXT: bnec $5, $4, .LBB17_3 +; MIPS64R6O0-NEXT: ll $1, 0($4) +; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS64R6O0-NEXT: move $6, $2 -; MIPS64R6O0-NEXT: sc $6, 0($1) -; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1 +; MIPS64R6O0-NEXT: move $3, $6 +; MIPS64R6O0-NEXT: sc $3, 0($4) +; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1 ; MIPS64R6O0-NEXT: .LBB17_3: # %entry -; MIPS64R6O0-NEXT: xor $1, $5, $3 +; MIPS64R6O0-NEXT: xor $1, $1, $2 ; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra @@ -7316,13 +7323,13 @@ define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(x)($1) -; MIPS32O0-NEXT: addiu $1, $1, 1024 +; MIPS32O0-NEXT: addiu $3, $1, 1024 ; MIPS32O0-NEXT: $BB18_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: addu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB18_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB18_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -7369,13 +7376,13 @@ define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: lw $1, %got(x)($1) -; MIPS32R6O0-NEXT: addiu $1, $1, 1024 +; MIPS32R6O0-NEXT: addiu $3, $1, 1024 ; MIPS32R6O0-NEXT: $BB18_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: addu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB18_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB18_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -7458,13 +7465,13 @@ define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) -; MIPS64R6O0-NEXT: daddiu $1, $1, 1024 +; MIPS64R6O0-NEXT: daddiu $3, $1, 1024 ; MIPS64R6O0-NEXT: .LBB18_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: addu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB18_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll index 5e59246eff5c8..d27c9ac42e059 100644 --- a/llvm/test/CodeGen/Mips/atomic64.ll +++ b/llvm/test/CodeGen/Mips/atomic64.ll @@ -95,13 +95,13 @@ define i64 @AtomicLoadAdd(i64 signext %incr) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB0_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: daddu $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: daddu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -252,13 +252,13 @@ define i64 @AtomicLoadSub(i64 signext %incr) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB1_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: dsubu $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: dsubu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -409,13 +409,13 @@ define i64 @AtomicLoadAnd(i64 signext %incr) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB2_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -566,13 +566,13 @@ define i64 @AtomicLoadOr(i64 signext %incr) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB3_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: or $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -723,13 +723,13 @@ define i64 @AtomicLoadXor(i64 signext %incr) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB4_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: xor $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -884,14 +884,14 @@ define i64 @AtomicLoadNand(i64 signext %incr) nounwind { ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB5_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: nor $3, $zero, $3 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1057,16 +1057,15 @@ define i64 @AtomicSwap64(i64 signext %newval) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ; MIPS64R6O0-NEXT: sd $4, 8($sp) -; MIPS64R6O0-NEXT: ld $2, 8($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $4, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB6_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $3, 0($1) -; MIPS64R6O0-NEXT: move $4, $2 -; MIPS64R6O0-NEXT: scd $4, 0($1) -; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -1253,19 +1252,18 @@ define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind { ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ; MIPS64R6O0-NEXT: sd $5, 8($sp) -; MIPS64R6O0-NEXT: ld $2, 8($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $5, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB7_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $3, 0($1) -; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS64R6O0-NEXT: move $5, $2 -; MIPS64R6O0-NEXT: scd $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ; MIPS64R6O0-NEXT: .LBB7_3: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; diff --git a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll index 64a62c1702818..ce994c2c18a9b 100644 --- a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll +++ b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -12,18 +12,18 @@ define void @foo(i32 %new, i32 %old) { ; O32-LABEL: foo: ; O32: # %bb.0: # %entry ; O32-NEXT: lui $1, %hi(sym) -; O32-NEXT: lw $1, %lo(sym)($1) +; O32-NEXT: lw $3, %lo(sym)($1) ; O32-NEXT: sync ; O32-NEXT: $BB0_1: # %entry ; O32-NEXT: # =>This Inner Loop Header: Depth=1 -; O32-NEXT: ll $2, 0($1) -; O32-NEXT: bne $2, $4, $BB0_3 +; O32-NEXT: ll $1, 0($3) +; O32-NEXT: bne $1, $4, $BB0_3 ; O32-NEXT: nop ; O32-NEXT: # %bb.2: # %entry ; O32-NEXT: # in Loop: Header=BB0_1 Depth=1 -; O32-NEXT: move $3, $5 -; O32-NEXT: sc $3, 0($1) -; O32-NEXT: beqz $3, $BB0_1 +; O32-NEXT: move $2, $5 +; O32-NEXT: sc $2, 0($3) +; O32-NEXT: beqz $2, $BB0_1 ; O32-NEXT: nop ; O32-NEXT: $BB0_3: # %entry ; O32-NEXT: sync @@ -32,23 +32,23 @@ define void @foo(i32 %new, i32 %old) { ; ; N32-LABEL: foo: ; N32: # %bb.0: # %entry -; N32-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; N32-NEXT: sll $1, $5, 0 -; N32-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; N32-NEXT: sll $2, $4, 0 -; N32-NEXT: lui $3, %hi(sym) -; N32-NEXT: lw $3, %lo(sym)($3) +; N32-NEXT: move $1, $5 +; N32-NEXT: sll $5, $1, 0 +; N32-NEXT: move $1, $4 +; N32-NEXT: sll $4, $1, 0 +; N32-NEXT: lui $1, %hi(sym) +; N32-NEXT: lw $3, %lo(sym)($1) ; N32-NEXT: sync ; N32-NEXT: .LBB0_1: # %entry ; N32-NEXT: # =>This Inner Loop Header: Depth=1 -; N32-NEXT: ll $4, 0($3) -; N32-NEXT: bne $4, $2, .LBB0_3 +; N32-NEXT: ll $1, 0($3) +; N32-NEXT: bne $1, $4, .LBB0_3 ; N32-NEXT: nop ; N32-NEXT: # %bb.2: # %entry ; N32-NEXT: # in Loop: Header=BB0_1 Depth=1 -; N32-NEXT: move $5, $1 -; N32-NEXT: sc $5, 0($3) -; N32-NEXT: beqz $5, .LBB0_1 +; N32-NEXT: move $2, $5 +; N32-NEXT: sc $2, 0($3) +; N32-NEXT: beqz $2, .LBB0_1 ; N32-NEXT: nop ; N32-NEXT: .LBB0_3: # %entry ; N32-NEXT: sync @@ -57,27 +57,27 @@ define void @foo(i32 %new, i32 %old) { ; ; N64-LABEL: foo: ; N64: # %bb.0: # %entry -; N64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; N64-NEXT: sll $1, $5, 0 -; N64-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; N64-NEXT: sll $2, $4, 0 -; N64-NEXT: lui $3, %highest(sym) -; N64-NEXT: daddiu $3, $3, %higher(sym) -; N64-NEXT: dsll $3, $3, 16 -; N64-NEXT: daddiu $3, $3, %hi(sym) -; N64-NEXT: dsll $3, $3, 16 -; N64-NEXT: ld $3, %lo(sym)($3) +; N64-NEXT: move $1, $5 +; N64-NEXT: sll $5, $1, 0 +; N64-NEXT: move $1, $4 +; N64-NEXT: sll $4, $1, 0 +; N64-NEXT: lui $1, %highest(sym) +; N64-NEXT: daddiu $1, $1, %higher(sym) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: daddiu $1, $1, %hi(sym) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: ld $3, %lo(sym)($1) ; N64-NEXT: sync ; N64-NEXT: .LBB0_1: # %entry ; N64-NEXT: # =>This Inner Loop Header: Depth=1 -; N64-NEXT: ll $4, 0($3) -; N64-NEXT: bne $4, $2, .LBB0_3 +; N64-NEXT: ll $1, 0($3) +; N64-NEXT: bne $1, $4, .LBB0_3 ; N64-NEXT: nop ; N64-NEXT: # %bb.2: # %entry ; N64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; N64-NEXT: move $5, $1 -; N64-NEXT: sc $5, 0($3) -; N64-NEXT: beqz $5, .LBB0_1 +; N64-NEXT: move $2, $5 +; N64-NEXT: sc $2, 0($3) +; N64-NEXT: beqz $2, .LBB0_1 ; N64-NEXT: nop ; N64-NEXT: .LBB0_3: # %entry ; N64-NEXT: sync diff --git a/llvm/test/CodeGen/Mips/copy-fp64.ll b/llvm/test/CodeGen/Mips/copy-fp64.ll index 439c788eb9be3..eb096e3787c2c 100644 --- a/llvm/test/CodeGen/Mips/copy-fp64.ll +++ b/llvm/test/CodeGen/Mips/copy-fp64.ll @@ -11,8 +11,8 @@ define double @foo(double %self) { ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $d12_64, $t9, $v0 ; CHECK: renamable $at = ADDu killed $v0, killed $t9 + ; CHECK: renamable $d6_64 = COPY killed $d12_64 ; CHECK: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; CHECK: $d6_64 = COPY killed renamable $d12_64 ; CHECK: renamable $t9 = LW killed renamable $at, target-flags(mips-got) @bar ; CHECK: dead $ra = JALR killed $t9, csr_o32_fp64, target-flags(mips-jalr) , implicit-def dead $ra, implicit killed $d6_64, implicit-def $d0_64 ; CHECK: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll index b9f6568e40c92..9c4d28fa0e471 100644 --- a/llvm/test/CodeGen/Mips/implicit-sret.ll +++ b/llvm/test/CodeGen/Mips/implicit-sret.ll @@ -20,9 +20,8 @@ define internal void @test() unnamed_addr nounwind { ; CHECK-NEXT: ld $5, 16($sp) ; CHECK-NEXT: ld $7, 32($sp) ; CHECK-NEXT: lw $1, 8($sp) -; CHECK-NEXT: # implicit-def: $v0_64 -; CHECK-NEXT: move $2, $1 -; CHECK-NEXT: move $4, $2 +; CHECK-NEXT: # implicit-def: $a0_64 +; CHECK-NEXT: move $4, $1 ; CHECK-NEXT: jal use_sret ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload @@ -41,15 +40,15 @@ start: define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind { ; CHECK-LABEL: implicit_sret_impl: ; CHECK: # %bb.0: -; CHECK-NEXT: move $1, $4 -; CHECK-NEXT: daddiu $2, $zero, 20 -; CHECK-NEXT: sd $2, 16($4) -; CHECK-NEXT: daddiu $2, $zero, 0 +; CHECK-NEXT: # kill: def $at_64 killed $a0_64 +; CHECK-NEXT: daddiu $1, $zero, 20 +; CHECK-NEXT: sd $1, 16($4) +; CHECK-NEXT: daddiu $1, $zero, 0 ; CHECK-NEXT: sd $zero, 8($4) -; CHECK-NEXT: daddiu $3, $zero, 30 -; CHECK-NEXT: sd $3, 24($4) -; CHECK-NEXT: addiu $3, $zero, 10 -; CHECK-NEXT: sw $3, 0($4) +; CHECK-NEXT: daddiu $1, $zero, 30 +; CHECK-NEXT: sd $1, 24($4) +; CHECK-NEXT: addiu $1, $zero, 10 +; CHECK-NEXT: sw $1, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } @@ -70,12 +69,10 @@ define internal void @test2() unnamed_addr nounwind { ; CHECK-NEXT: lw $3, 4($sp) ; CHECK-NEXT: # implicit-def: $a0_64 ; CHECK-NEXT: move $4, $3 -; CHECK-NEXT: # implicit-def: $v1_64 -; CHECK-NEXT: move $3, $2 -; CHECK-NEXT: # implicit-def: $v0_64 -; CHECK-NEXT: move $2, $1 -; CHECK-NEXT: move $5, $3 -; CHECK-NEXT: move $6, $2 +; CHECK-NEXT: # implicit-def: $a1_64 +; CHECK-NEXT: move $5, $2 +; CHECK-NEXT: # implicit-def: $a2_64 +; CHECK-NEXT: move $6, $1 ; CHECK-NEXT: jal use_sret2 ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload @@ -95,19 +92,19 @@ start: define internal { i32, i32, i32, i32, i32, i32 } @implicit_sret_impl2() unnamed_addr nounwind { ; CHECK-LABEL: implicit_sret_impl2: ; CHECK: # %bb.0: -; CHECK-NEXT: move $1, $4 -; CHECK-NEXT: addiu $2, $zero, 6 -; CHECK-NEXT: sw $2, 20($4) -; CHECK-NEXT: addiu $2, $zero, 5 -; CHECK-NEXT: sw $2, 16($4) -; CHECK-NEXT: addiu $2, $zero, 4 -; CHECK-NEXT: sw $2, 12($4) -; CHECK-NEXT: addiu $2, $zero, 3 -; CHECK-NEXT: sw $2, 8($4) -; CHECK-NEXT: addiu $2, $zero, 2 -; CHECK-NEXT: sw $2, 4($4) -; CHECK-NEXT: addiu $2, $zero, 1 -; CHECK-NEXT: sw $2, 0($4) +; CHECK-NEXT: # kill: def $at_64 killed $a0_64 +; CHECK-NEXT: addiu $1, $zero, 6 +; CHECK-NEXT: sw $1, 20($4) +; CHECK-NEXT: addiu $1, $zero, 5 +; CHECK-NEXT: sw $1, 16($4) +; CHECK-NEXT: addiu $1, $zero, 4 +; CHECK-NEXT: sw $1, 12($4) +; CHECK-NEXT: addiu $1, $zero, 3 +; CHECK-NEXT: sw $1, 8($4) +; CHECK-NEXT: addiu $1, $zero, 2 +; CHECK-NEXT: sw $1, 4($4) +; CHECK-NEXT: addiu $1, $zero, 1 +; CHECK-NEXT: sw $1, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i32, i32, i32, i32, i32 } { i32 1, i32 2, i32 3, i32 4, i32 5, i32 6 } diff --git a/llvm/test/CodeGen/Mips/micromips-eva.mir b/llvm/test/CodeGen/Mips/micromips-eva.mir index fd30529f7097c..c4d05cf6985e9 100644 --- a/llvm/test/CodeGen/Mips/micromips-eva.mir +++ b/llvm/test/CodeGen/Mips/micromips-eva.mir @@ -196,19 +196,19 @@ body: | ... -# CHECK: 60 41 60 05 lbue $2, 5($1) -# CHECK: 60 41 68 05 lbe $2, 5($1) -# CHECK: 60 41 a8 03 sbe $2, 3($1) +# CHECK: 60 22 60 05 lbue $1, 5($2) +# CHECK: 60 22 68 05 lbe $1, 5($2) +# CHECK: 60 22 a8 03 sbe $1, 3($2) -# CHECK: 60 41 62 0a lhue $2, 10($1) -# CHECK: 60 41 6a 0a lhe $2, 10($1) -# CHECK: 60 41 aa 06 she $2, 6($1) +# CHECK: 60 22 62 0a lhue $1, 10($2) +# CHECK: 60 22 6a 0a lhe $1, 10($2) +# CHECK: 60 22 aa 06 she $1, 6($2) -# CHECK: 60 41 6e 14 lwe $2, 20($1) -# CHECK: 60 41 ae 0c swe $2, 12($1) +# CHECK: 60 22 6e 14 lwe $1, 20($2) +# CHECK: 60 22 ae 0c swe $1, 12($2) -# CHECK: 60 41 6c 00 lle $2, 0($1) -# CHECK: 60 81 ac 00 sce $4, 0($1) +# CHECK: 60 22 6c 00 lle $1, 0($2) +# CHECK: 60 22 ac 00 sce $1, 0($2) # CHECK: 60 41 a6 05 cachee 2, 5($1) # CHECK: 60 41 a4 05 prefe 2, 5($1) diff --git a/llvm/test/CodeGen/Mips/msa/ldr_str.ll b/llvm/test/CodeGen/Mips/msa/ldr_str.ll index 8bebd9481625d..51c8bcd3fdbc5 100644 --- a/llvm/test/CodeGen/Mips/msa/ldr_str.ll +++ b/llvm/test/CodeGen/Mips/msa/ldr_str.ll @@ -11,47 +11,47 @@ define void @llvm_mips_ldr_d_test(<2 x i64>* %val, i8* %ptr) nounwind { ; MIPS32R5-EB-LABEL: llvm_mips_ldr_d_test: ; MIPS32R5-EB: # %bb.0: # %entry -; MIPS32R5-EB-NEXT: # implicit-def: $at -; MIPS32R5-EB-NEXT: lwr $1, 23($5) -; MIPS32R5-EB-NEXT: lwl $1, 20($5) ; MIPS32R5-EB-NEXT: # implicit-def: $v0 -; MIPS32R5-EB-NEXT: lwr $2, 19($5) -; MIPS32R5-EB-NEXT: lwl $2, 16($5) -; MIPS32R5-EB-NEXT: fill.w $w0, $1 -; MIPS32R5-EB-NEXT: insert.w $w0[1], $2 +; MIPS32R5-EB-NEXT: lwr $2, 23($5) +; MIPS32R5-EB-NEXT: lwl $2, 20($5) +; MIPS32R5-EB-NEXT: # implicit-def: $at +; MIPS32R5-EB-NEXT: lwr $1, 19($5) +; MIPS32R5-EB-NEXT: lwl $1, 16($5) +; MIPS32R5-EB-NEXT: fill.w $w0, $2 +; MIPS32R5-EB-NEXT: insert.w $w0[1], $1 ; MIPS32R5-EB-NEXT: st.d $w0, 0($4) ; MIPS32R5-EB-NEXT: jr $ra ; MIPS32R5-EB-NEXT: nop ; ; MIPS32R5-EL-LABEL: llvm_mips_ldr_d_test: ; MIPS32R5-EL: # %bb.0: # %entry -; MIPS32R5-EL-NEXT: # implicit-def: $at -; MIPS32R5-EL-NEXT: lwr $1, 16($5) -; MIPS32R5-EL-NEXT: lwl $1, 19($5) ; MIPS32R5-EL-NEXT: # implicit-def: $v0 -; MIPS32R5-EL-NEXT: lwr $2, 20($5) -; MIPS32R5-EL-NEXT: lwl $2, 23($5) -; MIPS32R5-EL-NEXT: fill.w $w0, $1 -; MIPS32R5-EL-NEXT: insert.w $w0[1], $2 +; MIPS32R5-EL-NEXT: lwr $2, 16($5) +; MIPS32R5-EL-NEXT: lwl $2, 19($5) +; MIPS32R5-EL-NEXT: # implicit-def: $at +; MIPS32R5-EL-NEXT: lwr $1, 20($5) +; MIPS32R5-EL-NEXT: lwl $1, 23($5) +; MIPS32R5-EL-NEXT: fill.w $w0, $2 +; MIPS32R5-EL-NEXT: insert.w $w0[1], $1 ; MIPS32R5-EL-NEXT: st.d $w0, 0($4) ; MIPS32R5-EL-NEXT: jr $ra ; MIPS32R5-EL-NEXT: nop ; ; MIPS32R6-EB-LABEL: llvm_mips_ldr_d_test: ; MIPS32R6-EB: # %bb.0: # %entry -; MIPS32R6-EB-NEXT: lw $1, 20($5) -; MIPS32R6-EB-NEXT: lw $2, 16($5) -; MIPS32R6-EB-NEXT: fill.w $w0, $1 -; MIPS32R6-EB-NEXT: insert.w $w0[1], $2 +; MIPS32R6-EB-NEXT: lw $2, 20($5) +; MIPS32R6-EB-NEXT: lw $1, 16($5) +; MIPS32R6-EB-NEXT: fill.w $w0, $2 +; MIPS32R6-EB-NEXT: insert.w $w0[1], $1 ; MIPS32R6-EB-NEXT: st.d $w0, 0($4) ; MIPS32R6-EB-NEXT: jrc $ra ; ; MIPS32R6-EL-LABEL: llvm_mips_ldr_d_test: ; MIPS32R6-EL: # %bb.0: # %entry -; MIPS32R6-EL-NEXT: lw $1, 16($5) -; MIPS32R6-EL-NEXT: lw $2, 20($5) -; MIPS32R6-EL-NEXT: fill.w $w0, $1 -; MIPS32R6-EL-NEXT: insert.w $w0[1], $2 +; MIPS32R6-EL-NEXT: lw $2, 16($5) +; MIPS32R6-EL-NEXT: lw $1, 20($5) +; MIPS32R6-EL-NEXT: fill.w $w0, $2 +; MIPS32R6-EL-NEXT: insert.w $w0[1], $1 ; MIPS32R6-EL-NEXT: st.d $w0, 0($4) ; MIPS32R6-EL-NEXT: jrc $ra ; @@ -122,43 +122,43 @@ define void @llvm_mips_str_d_test(<2 x i64>* %val, i8* %ptr) nounwind { ; MIPS32R5-EB-LABEL: llvm_mips_str_d_test: ; MIPS32R5-EB: # %bb.0: # %entry ; MIPS32R5-EB-NEXT: ld.d $w0, 0($4) -; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5-EB-NEXT: swr $1, 19($5) -; MIPS32R5-EB-NEXT: swl $1, 16($5) -; MIPS32R5-EB-NEXT: swr $2, 23($5) -; MIPS32R5-EB-NEXT: swl $2, 20($5) +; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[1] +; MIPS32R5-EB-NEXT: swr $2, 19($5) +; MIPS32R5-EB-NEXT: swl $2, 16($5) +; MIPS32R5-EB-NEXT: swr $1, 23($5) +; MIPS32R5-EB-NEXT: swl $1, 20($5) ; MIPS32R5-EB-NEXT: jr $ra ; MIPS32R5-EB-NEXT: nop ; ; MIPS32R5-EL-LABEL: llvm_mips_str_d_test: ; MIPS32R5-EL: # %bb.0: # %entry ; MIPS32R5-EL-NEXT: ld.d $w0, 0($4) -; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5-EL-NEXT: swr $1, 16($5) -; MIPS32R5-EL-NEXT: swl $1, 19($5) -; MIPS32R5-EL-NEXT: swr $2, 20($5) -; MIPS32R5-EL-NEXT: swl $2, 23($5) +; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[1] +; MIPS32R5-EL-NEXT: swr $2, 16($5) +; MIPS32R5-EL-NEXT: swl $2, 19($5) +; MIPS32R5-EL-NEXT: swr $1, 20($5) +; MIPS32R5-EL-NEXT: swl $1, 23($5) ; MIPS32R5-EL-NEXT: jr $ra ; MIPS32R5-EL-NEXT: nop ; ; MIPS32R6-EB-LABEL: llvm_mips_str_d_test: ; MIPS32R6-EB: # %bb.0: # %entry ; MIPS32R6-EB-NEXT: ld.d $w0, 0($4) -; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[0] -; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R6-EB-NEXT: sw $1, 20($5) -; MIPS32R6-EB-NEXT: sw $2, 16($5) +; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[1] +; MIPS32R6-EB-NEXT: sw $2, 20($5) +; MIPS32R6-EB-NEXT: sw $1, 16($5) ; MIPS32R6-EB-NEXT: jrc $ra ; ; MIPS32R6-EL-LABEL: llvm_mips_str_d_test: ; MIPS32R6-EL: # %bb.0: # %entry ; MIPS32R6-EL-NEXT: ld.d $w0, 0($4) -; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R6-EL-NEXT: sw $1, 16($5) -; MIPS32R6-EL-NEXT: sw $2, 20($5) +; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[0] +; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[1] +; MIPS32R6-EL-NEXT: sw $2, 16($5) +; MIPS32R6-EL-NEXT: sw $1, 20($5) ; MIPS32R6-EL-NEXT: jrc $ra ; ; MIPS64R6-LABEL: llvm_mips_str_d_test: diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll index c38f377869f86..2338ca9ded04c 100644 --- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -6,27 +6,30 @@ target triple = "powerpc64le-unknown-linux-gnu" define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* nocapture readonly %b) { ; CHECK-LABEL: bn_mul_comba8: ; CHECK: # %bb.0: -; CHECK-NEXT: ld 6, 0(4) -; CHECK-NEXT: ld 7, 0(5) -; CHECK-NEXT: mulhdu 8, 7, 6 -; CHECK-NEXT: ld 4, 8(4) -; CHECK-NEXT: mulld 9, 4, 6 -; CHECK-NEXT: mulhdu 4, 4, 6 -; CHECK-NEXT: addc 6, 9, 8 -; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: std 4, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: ld 3, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 9, 0(3) +; CHECK-NEXT: ld 8, 0(5) +; CHECK-NEXT: mulhdu 7, 8, 9 +; CHECK-NEXT: ld 3, 8(3) +; CHECK-NEXT: mulld 6, 3, 9 +; CHECK-NEXT: mulhdu 3, 3, 9 +; CHECK-NEXT: addc 6, 6, 7 +; CHECK-NEXT: addze 3, 3 ; CHECK-NEXT: ld 5, 8(5) -; CHECK-NEXT: mulld 8, 5, 7 -; CHECK-NEXT: mulhdu 5, 5, 7 -; CHECK-NEXT: addc 6, 6, 8 +; CHECK-NEXT: mulld 7, 5, 8 +; CHECK-NEXT: mulhdu 5, 5, 8 +; CHECK-NEXT: addc 6, 6, 7 ; CHECK-NEXT: addze 5, 5 -; CHECK-NEXT: add 4, 5, 4 -; CHECK-NEXT: cmpld 7, 4, 5 -; CHECK-NEXT: mfocrf 4, 1 -; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x5 -; CHECK-NEXT: mr 5, 4 -; CHECK-NEXT: clrldi 4, 5, 32 -; CHECK-NEXT: std 4, 0(3) +; CHECK-NEXT: add 3, 5, 3 +; CHECK-NEXT: cmpld 7, 3, 5 +; CHECK-NEXT: mfocrf 3, 1 +; CHECK-NEXT: rlwinm 5, 3, 29, 31, 31 +; CHECK-NEXT: # implicit-def: $x3 +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: std 3, 0(4) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 %conv = zext i64 %1 to i128 diff --git a/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll b/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll index 95dd58f513cc7..d55d788665471 100644 --- a/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll +++ b/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll @@ -9,7 +9,7 @@ entry: lnext: %elementArray = load i32*, i32** %elementArrayPtr, align 8 -; CHECK: lwz [[LDREG:[0-9]+]], 124(1) # 4-byte Folded Reload +; CHECK: lwz [[LDREG:[0-9]+]], 140(1) # 4-byte Folded Reload ; CHECK: # implicit-def: $x[[TEMPREG:[0-9]+]] %element = load i32, i32* %elementArray, align 4 ; CHECK: mr [[TEMPREG]], [[LDREG]] diff --git a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py index e04491bff2fb9..85776b7609486 100644 --- a/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py +++ b/llvm/test/CodeGen/PowerPC/aix-overflow-toc.py @@ -28,41 +28,41 @@ print("}") # 32-bit assembly check -# ASM32: lwz 3, L..C0(2) -# ASM32: lwz 3, L..C1(2) +# ASM32: lwz 4, L..C0(2) +# ASM32: lwz 4, L..C1(2) -# ASM32: lwz 3, L..C8191(2) -# ASM32: lwz 3, L..C8192-65536(2) -# ASM32: lwz 3, L..C8193-65536(2) +# ASM32: lwz 4, L..C8191(2) +# ASM32: lwz 4, L..C8192-65536(2) +# ASM32: lwz 4, L..C8193-65536(2) -# ASM32: lwz 3, L..C12288-65536(2) -# ASM32: lwz 3, L..C12289-65536(2) +# ASM32: lwz 4, L..C12288-65536(2) +# ASM32: lwz 4, L..C12289-65536(2) # 64-bit assembly check -# ASM64: ld 3, L..C0(2) -# ASM64: ld 3, L..C1(2) +# ASM64: ld 4, L..C0(2) +# ASM64: ld 4, L..C1(2) -# ASM64: ld 3, L..C4095(2) -# ASM64: ld 3, L..C4096-65536(2) -# ASM64: ld 3, L..C4097-65536(2) +# ASM64: ld 4, L..C4095(2) +# ASM64: ld 4, L..C4096-65536(2) +# ASM64: ld 4, L..C4097-65536(2) -# ASM64: ld 3, L..C12287-65536(2) -# ASM64: ld 3, L..C12288-131072(2) -# ASM64: ld 3, L..C12289-131072(2) +# ASM64: ld 4, L..C12287-65536(2) +# ASM64: ld 4, L..C12288-131072(2) +# ASM64: ld 4, L..C12289-131072(2) -# DIS32: 0: 80 62 00 00 lwz 3, 0(2) +# DIS32: 0: 80 82 00 00 lwz 4, 0(2) # DIS32: 00000002: R_TOC (idx: 24590) a0[TC] -# DIS32: c: 80 62 00 04 lwz 3, 4(2) +# DIS32: c: 80 82 00 04 lwz 4, 4(2) # DIS32: 0000000e: R_TOC (idx: 24592) a1[TC] -# DIS32: fffc: 80 62 7f fc lwz 3, 32764(2) +# DIS32: fffc: 80 82 7f fc lwz 4, 32764(2) # DIS32: 0000fffe: R_TOC (idx: 40972) a8191[TC] -# DIS32: 10004: 80 62 80 00 lwz 3, -32768(2) +# DIS32: 10004: 80 82 80 00 lwz 4, -32768(2) # DIS32: 00010006: R_TOC (idx: 40974) a8192[TC] -# DIS32: 1000c: 80 62 80 04 lwz 3, -32764(2) +# DIS32: 1000c: 80 82 80 04 lwz 4, -32764(2) # DIS32: 0001000e: R_TOC (idx: 40976) a8193[TC] -# DIS32: 18004: 80 62 c0 00 lwz 3, -16384(2) +# DIS32: 18004: 80 82 c0 00 lwz 4, -16384(2) # DIS32: 00018006: R_TOC (idx: 49166) a12288[TC] -# DIS32: 1800c: 80 62 c0 04 lwz 3, -16380(2) +# DIS32: 1800c: 80 82 c0 04 lwz 4, -16380(2) # DIS32: 0001800e: R_TOC (idx: 49168) a12289[TC] diff --git a/llvm/test/CodeGen/PowerPC/anon_aggr.ll b/llvm/test/CodeGen/PowerPC/anon_aggr.ll index 17f4ed46697d6..cc07c4843655a 100644 --- a/llvm/test/CodeGen/PowerPC/anon_aggr.ll +++ b/llvm/test/CodeGen/PowerPC/anon_aggr.ll @@ -19,9 +19,9 @@ unequal: } ; CHECK-LABEL: func1: -; CHECK: cmpld {{([0-9]+,)?}}4, 5 ; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]] +; CHECK: cmpld {{([0-9]+,)?}}4, 5 ; CHECK: ld 3, -[[OFFSET1]](1) ; CHECK: ld 3, -[[OFFSET2]](1) @@ -38,13 +38,13 @@ unequal: ret i8* %array2_ptr } ; CHECK-LABEL: func2: -; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 5 +; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 3 ; CHECK-DAG: std 6, 72(1) ; CHECK-DAG: std 5, 64(1) -; CHECK-DAG: std 5, -[[OFFSET1:[0-9]+]] +; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 3, -[[OFFSET2:[0-9]+]] -; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) +; CHECK: ld 3, -[[OFFSET2]](1) define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) { entry: @@ -85,9 +85,9 @@ unequal: ; CHECK-LABEL: func4: ; CHECK-DAG: ld [[REG2:[0-9]+]], 120(1) ; CHECK-DAG: ld [[REG3:[0-9]+]], 136(1) -; CHECK-DAG: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]] -; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1) +; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]](1) ; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1) +; CHECK: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]] ; CHECK: ld 3, -[[OFFSET1]](1) ; CHECK: ld 3, -[[OFFSET2]](1) diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll index ca25afa458aa7..17617e90a01f4 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -248,8 +248,7 @@ define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly % ; ; CHECK-O0-LABEL: vec_xl_zext: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: lxvrbx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrbx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset @@ -269,8 +268,7 @@ define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture rea ; CHECK-O0-LABEL: vec_xl_zext_short: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lxvrhx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrhx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset @@ -290,8 +288,7 @@ define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture read ; CHECK-O0-LABEL: vec_xl_zext_word: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lxvrwx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrwx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset @@ -311,8 +308,7 @@ define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readon ; CHECK-O0-LABEL: vec_xl_zext_dw: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: lxvrdx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrdx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset @@ -334,9 +330,9 @@ define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) { ; CHECK-O0-LABEL: vec_xl_sext_b: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: lbzx r3, r4, r3 -; CHECK-O0-NEXT: extsb r3, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: extsb r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset @@ -358,9 +354,9 @@ define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) { ; CHECK-O0-LABEL: vec_xl_sext_h: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lhax r3, r4, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: lhax r4, r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset @@ -382,9 +378,9 @@ define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) { ; CHECK-O0-LABEL: vec_xl_sext_w: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lwax r3, r4, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: lwax r4, r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset @@ -406,9 +402,9 @@ define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) { ; CHECK-O0-LABEL: vec_xl_sext_d: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: ldx r3, r4, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: ldx r4, r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset diff --git a/llvm/test/CodeGen/PowerPC/elf-common.ll b/llvm/test/CodeGen/PowerPC/elf-common.ll index cc73d9b58b54a..722b4803ca3a2 100644 --- a/llvm/test/CodeGen/PowerPC/elf-common.ll +++ b/llvm/test/CodeGen/PowerPC/elf-common.ll @@ -6,7 +6,7 @@ ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 < %s | FileCheck -check-prefix=PIC %s -; Test correct code generation for static and pic for loading and storing a common symbol +; Test correct code generation for static and pic for loading and storing a common symbol @comm_glob = common global i32 0, align 4 @@ -14,11 +14,11 @@ define signext i32 @test_comm() nounwind { ; NOOPT-LABEL: test_comm: ; NOOPT: # %bb.0: # %entry ; NOOPT-NEXT: addis 3, 2, comm_glob@toc@ha -; NOOPT-NEXT: addi 3, 3, comm_glob@toc@l -; NOOPT-NEXT: lwz 4, 0(3) -; NOOPT-NEXT: addi 5, 4, 1 -; NOOPT-NEXT: stw 5, 0(3) -; NOOPT-NEXT: extsw 3, 4 +; NOOPT-NEXT: addi 5, 3, comm_glob@toc@l +; NOOPT-NEXT: lwz 3, 0(5) +; NOOPT-NEXT: addi 4, 3, 1 +; NOOPT-NEXT: stw 4, 0(5) +; NOOPT-NEXT: extsw 3, 3 ; NOOPT-NEXT: blr ; ; STATIC-LABEL: test_comm: diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll index 3758f8db10cef..484162d089e58 100644 --- a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll @@ -22,12 +22,12 @@ define internal void @loadFP(double* %d) #0 { ; CHECK-NEXT: paddi r3, 0, .L.str@PCREL, 1 ; CHECK-NEXT: bl printf@notoc ; CHECK-NEXT: ld r4, 104(r1) -; CHECK-NEXT: lis r5, 16403 -; CHECK-NEXT: ori r5, r5, 62914 -; CHECK-NEXT: sldi r5, r5, 32 -; CHECK-NEXT: oris r5, r5, 36700 -; CHECK-NEXT: ori r5, r5, 10486 -; CHECK-NEXT: std r5, 0(r4) +; CHECK-NEXT: lis r3, 16403 +; CHECK-NEXT: ori r3, r3, 62914 +; CHECK-NEXT: sldi r3, r3, 32 +; CHECK-NEXT: oris r3, r3, 36700 +; CHECK-NEXT: ori r3, r3, 10486 +; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -50,16 +50,14 @@ define internal void @loadGV() #0 { ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: stdu r1, -96(r1) ; CHECK-NEXT: paddi r3, 0, .L.str.1@PCREL, 1 ; CHECK-NEXT: bl printf@notoc -; CHECK-NEXT: pld r4, stdout@got@pcrel(0), 1 -; CHECK-NEXT: ld r4, 0(r4) -; CHECK-NEXT: li r5, 97 -; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: mr r3, r5 +; CHECK-NEXT: pld r3, stdout@got@pcrel(0), 1 +; CHECK-NEXT: ld r4, 0(r3) +; CHECK-NEXT: li r3, 97 ; CHECK-NEXT: bl _IO_putc@notoc -; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll index 47c05b56c2fae..b46b1409da7d6 100644 --- a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll @@ -29,8 +29,7 @@ entry: define float @f_i128_fi_nsz(float %v) #0 { ; CHECK-LABEL: f_i128_fi_nsz: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsrdpiz 0, 1 -; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: xsrdpiz 1, 1 ; CHECK-NEXT: blr entry: %a = fptosi float %v to i128 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll index e6bc0f4bd769f..6b6703f0cbbac 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll @@ -12,17 +12,17 @@ define i32 @une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { ; CHECK-NEXT: mfocrf r4, 1 ; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 ; CHECK-NEXT: xori r4, r4, 1 -; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: and r4, r3, r4 ; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 -; CHECK-NEXT: xori r4, r4, 1 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 ; CHECK-NEXT: xscmpudp cr7, f1, f3 ; CHECK-NEXT: mfocrf r5, 1 ; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 ; CHECK-NEXT: xori r5, r5, 1 -; CHECK-NEXT: and r4, r4, r5 -; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: and r3, r3, r5 +; CHECK-NEXT: or r3, r3, r4 ; CHECK-NEXT: # kill: def $r4 killed $r3 ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: blr @@ -42,23 +42,21 @@ define i32 @ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { ; CHECK-NEXT: xscmpudp cr7, f2, f4 ; CHECK-NEXT: mfocrf r4, 1 ; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 -; CHECK-NEXT: and r3, r3, r4 -; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: and r4, r3, r4 ; CHECK-NEXT: xscmpudp cr0, f1, f3 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rotlwi r4, r4, 28 -; CHECK-NEXT: stw r4, -4(r1) -; CHECK-NEXT: mcrf cr7, cr0 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: stw r3, -4(r1) +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: mfocrf r3, 1 ; CHECK-NEXT: lwz r5, -4(r1) ; CHECK-NEXT: rotlwi r5, r5, 4 ; CHECK-NEXT: mtocrf 1, r5 -; CHECK-NEXT: mfocrf r5, 1 -; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 -; CHECK-NEXT: xori r5, r5, 1 -; CHECK-NEXT: and r4, r5, r4 -; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: rlwinm r5, r3, 30, 31, 31 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: and r3, r3, r5 +; CHECK-NEXT: or r3, r3, r4 ; CHECK-NEXT: # kill: def $r4 killed $r3 ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: blr @@ -74,9 +72,8 @@ define i1 @test_f128(fp128 %a, fp128 %b) #0 { ; CHECK-NEXT: xscmpuqp cr7, v2, v3 ; CHECK-NEXT: mfocrf r3, 1 ; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xori r3, r3, 1 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: xori r4, r3, 1 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll index 27c6e71ba803c..627db54ef09fa 100644 --- a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll +++ b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll @@ -9,9 +9,8 @@ define i1 @Test(double %a) { ; CHECK-NEXT: mffprwz 3, 0 ; CHECK-NEXT: xori 3, 3, 65534 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srwi 4, 3, 5 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll b/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll index 8844f621ee8fd..59311dbb2f5f5 100644 --- a/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll +++ b/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll @@ -145,19 +145,37 @@ entry: } define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_8_1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-NEXT: vinserth 3, 2, 14 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_8_1: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-OPT-NEXT: vinserth 3, 2, 14 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_8_1: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-BE-NEXT: vinserth 3, 2, 0 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_8_1: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-O0-NEXT: vinserth 2, 3, 14 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_8_1: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 0 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_8_1: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 0 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins @@ -166,131 +184,255 @@ entry: ; The following testcases take one halfword element from the first vector and ; inserts it at various locations in the second vector define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_9_7: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-NEXT: vinserth 3, 2, 12 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_9_7: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-OPT-NEXT: vinserth 3, 2, 12 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_9_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-BE-NEXT: vinserth 3, 2, 2 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_9_7: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-O0-NEXT: vinserth 2, 3, 12 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_9_7: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 2 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_9_7: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 2 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_10_4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinserth 3, 2, 10 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_10_4: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vinserth 3, 2, 10 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_10_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-BE-NEXT: vinserth 3, 2, 4 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_10_4: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vinserth 2, 3, 10 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_10_4: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 4 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_10_4: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 4 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_11_2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-NEXT: vinserth 3, 2, 8 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_11_2: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-OPT-NEXT: vinserth 3, 2, 8 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_11_2: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-BE-NEXT: vinserth 3, 2, 6 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_11_2: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-O0-NEXT: vinserth 2, 3, 8 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_11_2: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 6 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_11_2: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 6 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_12_6: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-NEXT: vinserth 3, 2, 6 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_12_6: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-OPT-NEXT: vinserth 3, 2, 6 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_12_6: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-BE-NEXT: vinserth 3, 2, 8 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_12_6: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-O0-NEXT: vinserth 2, 3, 6 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_12_6: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 8 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_12_6: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 8 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_13_3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-NEXT: vinserth 3, 2, 4 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_13_3: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-OPT-NEXT: vinserth 3, 2, 4 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_13_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinserth 3, 2, 10 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_13_3: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-O0-NEXT: vinserth 2, 3, 4 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_13_3: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 10 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_13_3: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vinserth 2, 3, 10 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_14_5: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-NEXT: vinserth 3, 2, 2 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_14_5: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-OPT-NEXT: vinserth 3, 2, 2 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_14_5: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-BE-NEXT: vinserth 3, 2, 12 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_14_5: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-O0-NEXT: vinserth 2, 3, 2 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_14_5: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 12 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_14_5: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 12 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_15_0: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-NEXT: vinserth 3, 2, 0 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_15_0: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-OPT-NEXT: vinserth 3, 2, 0 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_15_0: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-BE-NEXT: vinserth 3, 2, 14 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_15_0: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-O0-NEXT: vinserth 2, 3, 0 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_15_0: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 14 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_15_0: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 14 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins @@ -718,302 +860,588 @@ entry: ; The following testcases take one byte element from the first vector and ; inserts it at various locations in the second vector define <16 x i8> @shuffle_vector_byte_16_8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_16_8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 3, 2, 15 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_16_8: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vinsertb 3, 2, 15 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_16_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 1 -; CHECK-BE-NEXT: vinsertb 3, 2, 0 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_16_8: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_16_8: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 1 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 0 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_16_8: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_17_1(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_17_1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 7 -; CHECK-NEXT: vinsertb 3, 2, 14 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_17_1: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 7 +; CHECK-OPT-NEXT: vinsertb 3, 2, 14 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_17_1: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-BE-NEXT: vinsertb 3, 2, 1 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_17_1: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_17_1: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 1 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_17_1: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_18_10(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_18_10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-NEXT: vinsertb 3, 2, 13 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_18_10: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-OPT-NEXT: vinsertb 3, 2, 13 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_18_10: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 3 -; CHECK-BE-NEXT: vinsertb 3, 2, 2 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_18_10: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_18_10: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 3 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 2 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_18_10: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_19_3(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_19_3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 5 -; CHECK-NEXT: vinsertb 3, 2, 12 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_19_3: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 5 +; CHECK-OPT-NEXT: vinsertb 3, 2, 12 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_19_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-BE-NEXT: vinsertb 3, 2, 3 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_19_3: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_19_3: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 3 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_19_3: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_20_12(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_20_12: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-NEXT: vinsertb 3, 2, 11 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_20_12: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-OPT-NEXT: vinsertb 3, 2, 11 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_20_12: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 5 -; CHECK-BE-NEXT: vinsertb 3, 2, 4 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_20_12: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_20_12: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 5 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 4 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_20_12: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_21_5(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_21_5: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 3 -; CHECK-NEXT: vinsertb 3, 2, 10 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_21_5: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 3 +; CHECK-OPT-NEXT: vinsertb 3, 2, 10 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_21_5: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-BE-NEXT: vinsertb 3, 2, 5 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_21_5: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_21_5: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 5 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_21_5: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_22_14(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_22_14: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-NEXT: vinsertb 3, 2, 9 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_22_14: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-OPT-NEXT: vinsertb 3, 2, 9 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_22_14: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 7 -; CHECK-BE-NEXT: vinsertb 3, 2, 6 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_22_14: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_22_14: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 7 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 6 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_22_14: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_23_7(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_23_7: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 1 -; CHECK-NEXT: vinsertb 3, 2, 8 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_23_7: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 1 +; CHECK-OPT-NEXT: vinsertb 3, 2, 8 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_23_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 3, 2, 7 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_23_7: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_23_7: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 7 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_23_7: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_24_0(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_24_0: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-NEXT: vinsertb 3, 2, 7 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_24_0: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-OPT-NEXT: vinsertb 3, 2, 7 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_24_0: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 9 -; CHECK-BE-NEXT: vinsertb 3, 2, 8 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_24_0: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_24_0: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 8 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_24_0: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_25_9(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_25_9: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 15 -; CHECK-NEXT: vinsertb 3, 2, 6 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_25_9: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 15 +; CHECK-OPT-NEXT: vinsertb 3, 2, 6 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_25_9: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-BE-NEXT: vinsertb 3, 2, 9 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_25_9: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_25_9: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 9 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_25_9: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_26_2(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_26_2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-NEXT: vinsertb 3, 2, 5 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_26_2: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-OPT-NEXT: vinsertb 3, 2, 5 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_26_2: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 11 -; CHECK-BE-NEXT: vinsertb 3, 2, 10 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_26_2: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_26_2: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 11 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 10 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_26_2: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_27_11(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_27_11: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 13 -; CHECK-NEXT: vinsertb 3, 2, 4 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_27_11: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-OPT-NEXT: vinsertb 3, 2, 4 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_27_11: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-BE-NEXT: vinsertb 3, 2, 11 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_27_11: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_27_11: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 11 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_27_11: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_28_4(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_28_4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-NEXT: vinsertb 3, 2, 3 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_28_4: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-OPT-NEXT: vinsertb 3, 2, 3 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_28_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 13 -; CHECK-BE-NEXT: vinsertb 3, 2, 12 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_28_4: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_28_4: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 12 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_28_4: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_29_13(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_29_13: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 11 -; CHECK-NEXT: vinsertb 3, 2, 2 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_29_13: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 11 +; CHECK-OPT-NEXT: vinsertb 3, 2, 2 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_29_13: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-BE-NEXT: vinsertb 3, 2, 13 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_29_13: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_29_13: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 13 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_29_13: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_30_6(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_30_6: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-NEXT: vinsertb 3, 2, 1 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_30_6: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-OPT-NEXT: vinsertb 3, 2, 1 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_30_6: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 15 -; CHECK-BE-NEXT: vinsertb 3, 2, 14 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_30_6: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_30_6: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 15 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 14 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_30_6: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_31_15(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_31_15: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 9 -; CHECK-NEXT: vinsertb 3, 2, 0 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_31_15: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-OPT-NEXT: vinsertb 3, 2, 0 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_31_15: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-BE-NEXT: vinsertb 3, 2, 15 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_31_15: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_31_15: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 15 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_31_15: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins @@ -1321,8 +1749,8 @@ define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_0: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 14 ; CHECK-O0-NEXT: blr @@ -1335,8 +1763,8 @@ define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_0: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 0 ; CHECK-BE-O0-NEXT: blr @@ -1354,8 +1782,8 @@ define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_1: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 12 ; CHECK-O0-NEXT: blr @@ -1368,8 +1796,8 @@ define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_1: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 2 ; CHECK-BE-O0-NEXT: blr @@ -1387,8 +1815,8 @@ define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 10 ; CHECK-O0-NEXT: blr @@ -1401,8 +1829,8 @@ define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_2: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 4 ; CHECK-BE-O0-NEXT: blr @@ -1420,8 +1848,8 @@ define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_3: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 8 ; CHECK-O0-NEXT: blr @@ -1434,8 +1862,8 @@ define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_3: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 6 ; CHECK-BE-O0-NEXT: blr @@ -1453,8 +1881,8 @@ define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_4: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 6 ; CHECK-O0-NEXT: blr @@ -1467,8 +1895,8 @@ define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_4: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 8 ; CHECK-BE-O0-NEXT: blr @@ -1486,8 +1914,8 @@ define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_5: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 4 ; CHECK-O0-NEXT: blr @@ -1500,8 +1928,8 @@ define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_5: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 10 ; CHECK-BE-O0-NEXT: blr @@ -1519,8 +1947,8 @@ define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_6: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 2 ; CHECK-O0-NEXT: blr @@ -1533,8 +1961,8 @@ define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_6: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 12 ; CHECK-BE-O0-NEXT: blr @@ -1552,8 +1980,8 @@ define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) { ; ; CHECK-O0-LABEL: insert_halfword_7: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 0 ; CHECK-O0-NEXT: blr @@ -1566,8 +1994,8 @@ define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) { ; ; CHECK-BE-O0-LABEL: insert_halfword_7: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 14 ; CHECK-BE-O0-NEXT: blr @@ -1587,8 +2015,8 @@ define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_0: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 15 ; CHECK-O0-NEXT: blr @@ -1601,8 +2029,8 @@ define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_0: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 0 ; CHECK-BE-O0-NEXT: blr @@ -1620,8 +2048,8 @@ define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_1: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 14 ; CHECK-O0-NEXT: blr @@ -1634,8 +2062,8 @@ define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_1: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 1 ; CHECK-BE-O0-NEXT: blr @@ -1653,8 +2081,8 @@ define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 13 ; CHECK-O0-NEXT: blr @@ -1667,8 +2095,8 @@ define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_2: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 2 ; CHECK-BE-O0-NEXT: blr @@ -1686,8 +2114,8 @@ define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_3: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 12 ; CHECK-O0-NEXT: blr @@ -1700,8 +2128,8 @@ define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_3: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 3 ; CHECK-BE-O0-NEXT: blr @@ -1719,8 +2147,8 @@ define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_4: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 11 ; CHECK-O0-NEXT: blr @@ -1733,8 +2161,8 @@ define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_4: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 4 ; CHECK-BE-O0-NEXT: blr @@ -1752,8 +2180,8 @@ define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_5: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 10 ; CHECK-O0-NEXT: blr @@ -1766,8 +2194,8 @@ define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_5: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 5 ; CHECK-BE-O0-NEXT: blr @@ -1785,8 +2213,8 @@ define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_6: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 9 ; CHECK-O0-NEXT: blr @@ -1799,8 +2227,8 @@ define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_6: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 6 ; CHECK-BE-O0-NEXT: blr @@ -1818,8 +2246,8 @@ define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_7: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 8 ; CHECK-O0-NEXT: blr @@ -1832,8 +2260,8 @@ define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_7: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 7 ; CHECK-BE-O0-NEXT: blr @@ -1851,8 +2279,8 @@ define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_8: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 7 ; CHECK-O0-NEXT: blr @@ -1865,8 +2293,8 @@ define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_8: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 8 ; CHECK-BE-O0-NEXT: blr @@ -1884,8 +2312,8 @@ define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_9: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 6 ; CHECK-O0-NEXT: blr @@ -1898,8 +2326,8 @@ define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_9: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 9 ; CHECK-BE-O0-NEXT: blr @@ -1917,8 +2345,8 @@ define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_10: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 5 ; CHECK-O0-NEXT: blr @@ -1931,8 +2359,8 @@ define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_10: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 10 ; CHECK-BE-O0-NEXT: blr @@ -1950,8 +2378,8 @@ define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_11: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 4 ; CHECK-O0-NEXT: blr @@ -1964,8 +2392,8 @@ define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_11: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 11 ; CHECK-BE-O0-NEXT: blr @@ -1983,8 +2411,8 @@ define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_12: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 3 ; CHECK-O0-NEXT: blr @@ -1997,8 +2425,8 @@ define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_12: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 12 ; CHECK-BE-O0-NEXT: blr @@ -2016,8 +2444,8 @@ define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_13: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 2 ; CHECK-O0-NEXT: blr @@ -2030,8 +2458,8 @@ define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_13: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 13 ; CHECK-BE-O0-NEXT: blr @@ -2049,8 +2477,8 @@ define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_14: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 1 ; CHECK-O0-NEXT: blr @@ -2063,8 +2491,8 @@ define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_14: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 14 ; CHECK-BE-O0-NEXT: blr @@ -2082,8 +2510,8 @@ define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) { ; ; CHECK-O0-LABEL: insert_byte_15: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 0 ; CHECK-O0-NEXT: blr @@ -2096,8 +2524,8 @@ define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) { ; ; CHECK-BE-O0-LABEL: insert_byte_15: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 15 ; CHECK-BE-O0-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/popcount.ll b/llvm/test/CodeGen/PowerPC/popcount.ll index fb20f1d3ee43b..107ae5484b5b0 100644 --- a/llvm/test/CodeGen/PowerPC/popcount.ll +++ b/llvm/test/CodeGen/PowerPC/popcount.ll @@ -5,11 +5,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount128: ; CHECK: # %bb.0: # %Entry -; CHECK-NEXT: ld 4, 0(3) -; CHECK-NEXT: ld 3, 8(3) -; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: ld 3, 0(4) +; CHECK-NEXT: ld 4, 8(4) ; CHECK-NEXT: popcntd 4, 4 -; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: add 3, 3, 4 ; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-NEXT: clrldi 3, 3, 56 ; CHECK-NEXT: blr @@ -27,17 +28,18 @@ declare i128 @llvm.ctpop.i128(i128) define i16 @popcount256(i256* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount256: ; CHECK: # %bb.0: # %Entry -; CHECK-NEXT: ld 4, 0(3) -; CHECK-NEXT: ld 5, 8(3) -; CHECK-NEXT: ld 6, 16(3) -; CHECK-NEXT: ld 3, 24(3) -; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: mr 6, 3 +; CHECK-NEXT: ld 3, 0(6) +; CHECK-NEXT: ld 5, 8(6) +; CHECK-NEXT: ld 4, 16(6) +; CHECK-NEXT: ld 6, 24(6) ; CHECK-NEXT: popcntd 6, 6 -; CHECK-NEXT: add 3, 6, 3 -; CHECK-NEXT: popcntd 5, 5 ; CHECK-NEXT: popcntd 4, 4 -; CHECK-NEXT: add 4, 4, 5 -; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: add 4, 4, 6 +; CHECK-NEXT: popcntd 5, 5 +; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: add 3, 3, 5 +; CHECK-NEXT: add 3, 3, 4 ; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-NEXT: clrldi 3, 3, 48 ; CHECK-NEXT: blr @@ -57,18 +59,18 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK-NEXT: xxlor 0, 34, 34 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 ; CHECK-NEXT: mffprd 3, 0 -; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: popcntd 4, 3 ; CHECK-NEXT: xxswapd 0, 34 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 -; CHECK-NEXT: mffprd 4, 0 -; CHECK-NEXT: popcntd 4, 4 -; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: mffprd 3, 0 +; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: add 3, 3, 4 ; CHECK-NEXT: mtfprd 0, 3 -; CHECK-NEXT: # kill: def $vsl0 killed $f0 +; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: mtfprd 1, 3 -; CHECK-NEXT: # kill: def $vsl1 killed $f1 -; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: mtfprd 0, 3 +; CHECK-NEXT: # kill: def $vsl0 killed $f0 +; CHECK-NEXT: xxmrghd 34, 0, 1 ; CHECK-NEXT: blr Entry: %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0) diff --git a/llvm/test/CodeGen/PowerPC/spill-nor0.ll b/llvm/test/CodeGen/PowerPC/spill-nor0.ll index 4eeb34d0f8995..c9c6651448292 100644 --- a/llvm/test/CodeGen/PowerPC/spill-nor0.ll +++ b/llvm/test/CodeGen/PowerPC/spill-nor0.ll @@ -12,6 +12,12 @@ if.then: ; preds = %entry if.end: ; preds = %entry %0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3 ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0 + br i1 undef, label %end0, label %end1 ; need successor blocks to force spill + +end0: + unreachable + +end1: unreachable ; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv diff --git a/llvm/test/CodeGen/PowerPC/spill-nor0.mir b/llvm/test/CodeGen/PowerPC/spill-nor0.mir new file mode 100644 index 0000000000000..2f50ff3701d13 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/spill-nor0.mir @@ -0,0 +1,17 @@ +# RUN: llc -o - %s -mtriple=powerpc64-- -run-pass=regallocfast | FileCheck %s +--- +# CHECK-LABEL: name: func +name: func +tracksRegLiveness: true +body: | + bb.0: + %0 : gprc = LI 42 + %1 : gprc_nor0 = COPY %0 + ; CHECK: STW + + ; Clobber all regs to force a spill + NOP csr_noregs + + ; CHECK: LWZ + NOP implicit %1 +... diff --git a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll index fc939e170ffba..7c6b9eaa6a067 100644 --- a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll +++ b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll @@ -6,11 +6,12 @@ ; CHECK: mflr 0 ; CHECK-NEXT: stw 0, 4(1) ; CHECK-NEXT: lis 0, -2 -; CHECK-NEXT: ori 0, 0, 65488 +; CHECK-NEXT: ori 0, 0, 65504 ; CHECK-NEXT: stwux 1, 1, 0 ; CHECK-NEXT: sub 0, 1, 0 ; CHECK-NEXT: lis 4, __stack_chk_guard@ha -; CHECK-NEXT: lwz 5, __stack_chk_guard@l(4) -; CHECK-NEXT: lis 6, 1 -; CHECK-NEXT: ori 6, 6, 44 -; CHECK-NEXT: stwx 5, 1, 6 +; CHECK-NEXT: stw 4, 16(1) +; CHECK-NEXT: lwz 4, __stack_chk_guard@l(4) +; CHECK-NEXT: lis 5, 1 +; CHECK-NEXT: ori 5, 5, 28 +; CHECK-NEXT: stwx 4, 1, 5 diff --git a/llvm/test/CodeGen/PowerPC/vsx-args.ll b/llvm/test/CodeGen/PowerPC/vsx-args.ll index 3e387d8da7d49..8cd2dbfde2795 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-args.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-args.ll @@ -24,11 +24,14 @@ entry: ; CHECK: blr ; CHECK-FISL-LABEL: @main -; CHECK-FISL: stxvd2x 34 -; CHECK-FISL: vmr 2, 3 -; CHECK-FISL: vmr 3, 4 -; CHECK-FISL: lxvd2x 36 +; CHECK-FISL: stxvd2x 36, 1, 3 +; CHECK-FISL: vmr 4, 3 +; CHECK-FISL: lxvd2x 35, 1, 3 +; CHECK-FISL: 3, 144 +; CHCEK-FISL: stxvd2x 36, 1, 3 +; CHECK-FISL: vmr 4, 2 ; CHECK-FISL: bl sv + ; CHECK-FISL: lxvd2x [[VC:[0-9]+]], ; CHECK-FISL: xvadddp 34, 34, [[VC]] ; CHECK-FISL: blr @@ -36,4 +39,3 @@ entry: attributes #0 = { noinline nounwind readnone } attributes #1 = { nounwind } - diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 4a78218262ca0..6349523bc395f 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -164,8 +164,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-FISL-LABEL: test6: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlxor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlxor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test6: @@ -193,8 +192,7 @@ define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-FISL-LABEL: test7: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlxor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlxor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test7: @@ -250,8 +248,7 @@ define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-FISL-LABEL: test9: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test9: @@ -279,8 +276,7 @@ define <16 x i8> @test10(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-FISL-LABEL: test10: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test10: @@ -336,8 +332,7 @@ define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-FISL-LABEL: test12: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxland vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test12: @@ -365,8 +360,7 @@ define <16 x i8> @test13(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-FISL-LABEL: test13: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxland vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test13: @@ -424,10 +418,8 @@ define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-FISL-LABEL: test15: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v4, v2, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test15: @@ -456,10 +448,8 @@ define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-FISL-LABEL: test16: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v4, v2, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test16: @@ -518,10 +508,8 @@ define <8 x i16> @test18(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-FISL-LABEL: test18: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlnor vs0, v3, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlandc vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlnor v4, v3, v3 +; CHECK-FISL-NEXT: xxlandc v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test18: @@ -550,10 +538,8 @@ define <16 x i8> @test19(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-FISL-LABEL: test19: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlnor vs0, v3, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlandc vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlnor v4, v3, v3 +; CHECK-FISL-NEXT: xxlandc v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test19: @@ -664,10 +650,10 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xvcmpeqsp vs0, v4, v5 ; CHECK-FISL-NEXT: xvcmpeqsp vs1, v5, v5 +; CHECK-FISL-NEXT: xxlnor vs2, vs1, vs1 +; CHECK-FISL-NEXT: xvcmpeqsp vs1, v4, v4 ; CHECK-FISL-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-FISL-NEXT: xvcmpeqsp vs2, v4, v4 -; CHECK-FISL-NEXT: xxlnor vs2, vs2, vs2 -; CHECK-FISL-NEXT: xxlor vs1, vs2, vs1 +; CHECK-FISL-NEXT: xxlor vs1, vs1, vs2 ; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1 ; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr @@ -708,8 +694,8 @@ define <8 x i16> @test23(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) ; CHECK-FISL-LABEL: test23: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: vcmpequh v4, v4, v5 -; CHECK-FISL-NEXT: xxsel vs0, v3, v2, v4 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor vs0, v4, v4 +; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test23: @@ -742,8 +728,8 @@ define <16 x i8> @test24(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) ; CHECK-FISL-LABEL: test24: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: vcmpequb v4, v4, v5 -; CHECK-FISL-NEXT: xxsel vs0, v3, v2, v4 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor vs0, v4, v4 +; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test24: @@ -835,17 +821,16 @@ define <2 x i64> @test26(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: ld r3, -24(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: add r3, r4, r3 +; CHECK-FISL-NEXT: ld r4, -24(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: add r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: ld r3, -32(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: add r3, r4, r3 +; CHECK-FISL-NEXT: ld r4, -32(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: add r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test26: @@ -875,8 +860,7 @@ define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) { ; ; CHECK-FISL-LABEL: test27: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: xxland vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test27: @@ -1010,8 +994,7 @@ define <2 x i64> @test30(<2 x i64>* %a) { ; ; CHECK-FISL-LABEL: test30: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test30: @@ -1129,10 +1112,10 @@ define <4 x float> @test32u(<4 x float>* %a) { ; CHECK-FISL-LABEL: test32u: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: li r4, 15 -; CHECK-FISL-NEXT: lvx v2, r3, r4 -; CHECK-FISL-NEXT: lvsl v3, 0, r3 -; CHECK-FISL-NEXT: lvx v4, 0, r3 -; CHECK-FISL-NEXT: vperm v2, v4, v2, v3 +; CHECK-FISL-NEXT: lvx v3, r3, r4 +; CHECK-FISL-NEXT: lvsl v4, 0, r3 +; CHECK-FISL-NEXT: lvx v2, 0, r3 +; CHECK-FISL-NEXT: vperm v2, v2, v3, v4 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test32u: @@ -1390,10 +1373,10 @@ define <2 x float> @test44(<2 x i64> %a) { ; CHECK-FISL-NEXT: fcfidus f0, f0 ; CHECK-FISL-NEXT: stfs f0, -64(r1) ; CHECK-FISL-NEXT: addi r3, r1, -48 -; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 -; CHECK-FISL-NEXT: addi r3, r1, -64 ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 -; CHECK-FISL-NEXT: vmrghw v2, v3, v2 +; CHECK-FISL-NEXT: addi r3, r1, -64 +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vmrghw v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test44: @@ -1472,10 +1455,10 @@ define <2 x float> @test45(<2 x i64> %a) { ; CHECK-FISL-NEXT: fcfids f0, f0 ; CHECK-FISL-NEXT: stfs f0, -64(r1) ; CHECK-FISL-NEXT: addi r3, r1, -48 -; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 -; CHECK-FISL-NEXT: addi r3, r1, -64 ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 -; CHECK-FISL-NEXT: vmrghw v2, v3, v2 +; CHECK-FISL-NEXT: addi r3, r1, -64 +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vmrghw v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test45: @@ -1548,8 +1531,7 @@ define <2 x i64> @test46(<2 x float> %a) { ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test46: @@ -1616,8 +1598,7 @@ define <2 x i64> @test47(<2 x float> %a) { ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test47: @@ -1859,17 +1840,16 @@ define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: sld r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: sld r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: sld r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: sld r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test60: @@ -1925,17 +1905,16 @@ define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: srd r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: srd r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: srd r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: srd r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test61: @@ -1991,17 +1970,16 @@ define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: srad r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: srad r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: srad r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: srad r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test62: @@ -2027,7 +2005,6 @@ define double @test63(<2 x double> %a) { ; ; CHECK-FISL-LABEL: test63: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: # kill: def $vf2 killed $vf2 killed $v2 ; CHECK-FISL-NEXT: xxlor f1, v2, v2 ; CHECK-FISL-NEXT: blr ; @@ -2059,7 +2036,6 @@ define double @test64(<2 x double> %a) { ; CHECK-FISL-LABEL: test64: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: xxswapd vs0, v2 -; CHECK-FISL-NEXT: # kill: def $f0 killed $f0 killed $vsl0 ; CHECK-FISL-NEXT: fmr f1, f0 ; CHECK-FISL-NEXT: blr ; @@ -2117,8 +2093,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-LABEL: test66: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: vcmpequw v2, v2, v3 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlnor v2, v2, v2 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test66: @@ -2184,21 +2159,20 @@ define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: ld r3, -24(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: cmpld r4, r3 -; CHECK-FISL-NEXT: li r3, 0 -; CHECK-FISL-NEXT: li r4, -1 -; CHECK-FISL-NEXT: isellt r5, r4, r3 +; CHECK-FISL-NEXT: ld r4, -24(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: cmpld r3, r4 +; CHECK-FISL-NEXT: li r4, 0 +; CHECK-FISL-NEXT: li r3, -1 +; CHECK-FISL-NEXT: isellt r5, r3, r4 ; CHECK-FISL-NEXT: std r5, -8(r1) -; CHECK-FISL-NEXT: ld r5, -32(r1) -; CHECK-FISL-NEXT: ld r6, -48(r1) -; CHECK-FISL-NEXT: cmpld r6, r5 -; CHECK-FISL-NEXT: isellt r3, r4, r3 +; CHECK-FISL-NEXT: ld r6, -32(r1) +; CHECK-FISL-NEXT: ld r5, -48(r1) +; CHECK-FISL-NEXT: cmpld r5, r6 +; CHECK-FISL-NEXT: isellt r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test67: @@ -2284,15 +2258,15 @@ define <2 x double> @test69(<2 x i16> %a) { ; CHECK-FISL-NEXT: addi r3, r3, .LCPI63_0@toc@l ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 ; CHECK-FISL-NEXT: vperm v2, v2, v2, v3 +; CHECK-FISL-NEXT: xxlor vs0, v2, v2 ; CHECK-FISL-NEXT: addi r3, r1, -32 -; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 +; CHECK-FISL-NEXT: stxvd2x vs0, 0, r3 ; CHECK-FISL-NEXT: lha r3, -18(r1) ; CHECK-FISL-NEXT: std r3, -8(r1) ; CHECK-FISL-NEXT: lha r3, -26(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: xvcvsxddp v2, v2 ; CHECK-FISL-NEXT: blr ; @@ -2362,8 +2336,9 @@ define <2 x double> @test70(<2 x i8> %a) { ; CHECK-FISL-NEXT: addi r3, r3, .LCPI64_0@toc@l ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 ; CHECK-FISL-NEXT: vperm v2, v2, v2, v3 +; CHECK-FISL-NEXT: xxlor vs0, v2, v2 ; CHECK-FISL-NEXT: addi r3, r1, -32 -; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 +; CHECK-FISL-NEXT: stxvd2x vs0, 0, r3 ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: extsb r3, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) @@ -2371,8 +2346,7 @@ define <2 x double> @test70(<2 x i8> %a) { ; CHECK-FISL-NEXT: extsb r3, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: xvcvsxddp v2, v2 ; CHECK-FISL-NEXT: blr ; @@ -2494,16 +2468,16 @@ define double @test82(double %a, double %b, double %c, double %d) { ; ; CHECK-FISL-LABEL: test82: ; CHECK-FISL: # %bb.0: # %entry +; CHECK-FISL-NEXT: stfd f2, -16(r1) # 8-byte Folded Spill +; CHECK-FISL-NEXT: fmr f2, f1 ; CHECK-FISL-NEXT: xscmpudp cr0, f3, f4 ; CHECK-FISL-NEXT: stfd f2, -8(r1) # 8-byte Folded Spill -; CHECK-FISL-NEXT: stfd f1, -16(r1) # 8-byte Folded Spill ; CHECK-FISL-NEXT: beq cr0, .LBB67_2 ; CHECK-FISL-NEXT: # %bb.1: # %entry -; CHECK-FISL-NEXT: lfd f0, -8(r1) # 8-byte Folded Reload -; CHECK-FISL-NEXT: stfd f0, -16(r1) # 8-byte Folded Spill -; CHECK-FISL-NEXT: .LBB67_2: # %entry ; CHECK-FISL-NEXT: lfd f0, -16(r1) # 8-byte Folded Reload -; CHECK-FISL-NEXT: fmr f1, f0 +; CHECK-FISL-NEXT: stfd f0, -8(r1) # 8-byte Folded Spill +; CHECK-FISL-NEXT: .LBB67_2: # %entry +; CHECK-FISL-NEXT: lfd f1, -8(r1) # 8-byte Folded Reload ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test82: diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll index 0c402430dadc1..c4ce1cd9fc268 100644 --- a/llvm/test/CodeGen/SPARC/fp16-promote.ll +++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll @@ -124,15 +124,12 @@ define void @test_fptrunc_float(float %f, half* %p) nounwind { ; ; V8-UNOPT-LABEL: test_fptrunc_float: ; V8-UNOPT: ! %bb.0: -; V8-UNOPT-NEXT: save %sp, -104, %sp -; V8-UNOPT-NEXT: st %i0, [%fp+-4] -; V8-UNOPT-NEXT: ld [%fp+-4], %f0 +; V8-UNOPT-NEXT: save %sp, -96, %sp ; V8-UNOPT-NEXT: mov %i0, %o0 -; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill +; V8-UNOPT-NEXT: st %o0, [%fp+-4] ; V8-UNOPT-NEXT: call __gnu_f2h_ieee -; V8-UNOPT-NEXT: st %f0, [%fp+-12] -; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: sth %o0, [%i0] +; V8-UNOPT-NEXT: ld [%fp+-4], %f0 +; V8-UNOPT-NEXT: sth %o0, [%i1] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore ; @@ -176,21 +173,19 @@ define void @test_fptrunc_double(double %d, half* %p) nounwind { ; V8-UNOPT-LABEL: test_fptrunc_double: ; V8-UNOPT: ! %bb.0: ; V8-UNOPT-NEXT: save %sp, -112, %sp -; V8-UNOPT-NEXT: ! implicit-def: $i4_i5 +; V8-UNOPT-NEXT: mov %i1, %i3 ; V8-UNOPT-NEXT: mov %i0, %i4 -; V8-UNOPT-NEXT: mov %i1, %i5 -; V8-UNOPT-NEXT: std %i4, [%fp+-8] +; V8-UNOPT-NEXT: ! implicit-def: $i0_i1 +; V8-UNOPT-NEXT: mov %i4, %i0 +; V8-UNOPT-NEXT: mov %i3, %i1 +; V8-UNOPT-NEXT: std %i0, [%fp+-8] ; V8-UNOPT-NEXT: ldd [%fp+-8], %f0 ; V8-UNOPT-NEXT: std %f0, [%fp+-16] ; V8-UNOPT-NEXT: ldd [%fp+-16], %i0 -; V8-UNOPT-NEXT: mov %i0, %i3 -; V8-UNOPT-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1 -; V8-UNOPT-NEXT: mov %i3, %o0 -; V8-UNOPT-NEXT: mov %i1, %o1 +; V8-UNOPT-NEXT: mov %i0, %o0 ; V8-UNOPT-NEXT: call __truncdfhf2 -; V8-UNOPT-NEXT: st %i2, [%fp+-20] -; V8-UNOPT-NEXT: ld [%fp+-20], %i0 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: sth %o0, [%i0] +; V8-UNOPT-NEXT: mov %i1, %o1 +; V8-UNOPT-NEXT: sth %o0, [%i2] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore ; @@ -241,21 +236,18 @@ define void @test_fadd(half* %p, half* %q) nounwind { ; ; V8-UNOPT-LABEL: test_fadd: ; V8-UNOPT: ! %bb.0: -; V8-UNOPT-NEXT: save %sp, -112, %sp -; V8-UNOPT-NEXT: lduh [%i0], %o0 -; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill +; V8-UNOPT-NEXT: save %sp, -104, %sp ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %i0, [%fp+-12] -; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: lduh [%i0], %o0 +; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %f0, [%fp+-16] -; V8-UNOPT-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: fadds %f1, %f0, %f0 +; V8-UNOPT-NEXT: lduh [%i1], %o0 +; V8-UNOPT-NEXT: fmovs %f0, %f1 +; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload +; V8-UNOPT-NEXT: fadds %f0, %f1, %f0 ; V8-UNOPT-NEXT: st %f0, [%fp+-4] ; V8-UNOPT-NEXT: call __gnu_f2h_ieee ; V8-UNOPT-NEXT: ld [%fp+-4], %o0 -; V8-UNOPT-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: sth %o0, [%i0] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore @@ -318,21 +310,18 @@ define void @test_fmul(half* %p, half* %q) nounwind { ; ; V8-UNOPT-LABEL: test_fmul: ; V8-UNOPT: ! %bb.0: -; V8-UNOPT-NEXT: save %sp, -112, %sp -; V8-UNOPT-NEXT: lduh [%i0], %o0 -; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill +; V8-UNOPT-NEXT: save %sp, -104, %sp ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %i0, [%fp+-12] -; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: lduh [%i0], %o0 +; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %f0, [%fp+-16] -; V8-UNOPT-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: fmuls %f1, %f0, %f0 +; V8-UNOPT-NEXT: lduh [%i1], %o0 +; V8-UNOPT-NEXT: fmovs %f0, %f1 +; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload +; V8-UNOPT-NEXT: fmuls %f0, %f1, %f0 ; V8-UNOPT-NEXT: st %f0, [%fp+-4] ; V8-UNOPT-NEXT: call __gnu_f2h_ieee ; V8-UNOPT-NEXT: ld [%fp+-4], %o0 -; V8-UNOPT-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: sth %o0, [%i0] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore diff --git a/llvm/test/CodeGen/SystemZ/swift-return.ll b/llvm/test/CodeGen/SystemZ/swift-return.ll index 84e257f93218f..4bbdbcffd5271 100644 --- a/llvm/test/CodeGen/SystemZ/swift-return.ll +++ b/llvm/test/CodeGen/SystemZ/swift-return.ll @@ -14,10 +14,9 @@ ; CHECK-O0-LABEL: test ; CHECK-O0: st %r2 ; CHECK-O0: brasl %r14, gen -; CHECK-O0-DAG: lhr %[[REG1:r[0-9]+]], %r2 +; CHECK-O0-DAG: lhr %r2, %r2 ; CHECK-O0-DAG: lbr %[[REG2:r[0-9]+]], %r3 -; CHECK-O0: ar %[[REG1]], %[[REG2]] -; CHECK-O0: lr %r2, %[[REG1]] +; CHECK-O0: ar %r2, %[[REG2]] define i16 @test(i32 %key) { entry: %key.addr = alloca i32, align 4 @@ -61,7 +60,6 @@ declare swiftcc { i16, i8 } @gen(i32) ; CHECK-O0: ar ; CHECK-O0: ar ; CHECK-O0: ar -; CHECK-O0: lr %r2 define i32 @test2(i32 %key) #0 { entry: %key.addr = alloca i32, align 4 diff --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll index 76b1e02ebe224..d8fe608582c91 100644 --- a/llvm/test/CodeGen/SystemZ/swifterror.ll +++ b/llvm/test/CodeGen/SystemZ/swifterror.ll @@ -16,7 +16,7 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-O0-LABEL: foo: ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr %r0, %r2 +; CHECK-O0: lgr [[T0:%r[0-9]+]], %r2 ; CHECK-O0: mvi 8(%r2), 1 entry: %call = call i8* @malloc(i64 16) @@ -118,19 +118,17 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { ; CHECK-NOT: %r9 ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: chi %r2, 0 ; spill to stack ; CHECK-O0: stg %r9, [[OFFS:[0-9]+]](%r15) +; CHECK-O0: chi %r2, 0 ; CHECK-O0: je ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-O0: lgr %r9, %r2 ; CHECK-O0: mvi 8(%r2), 1 -; CHECK-O0: lgr %r9, %r[[REG1]] ; CHECK-O0: br %r14 ; reload from stack -; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS]](%r15) -; CHECK-O0: lgr %r9, %r[[REG2]] +; CHECK-O0: lg %r9, [[OFFS]](%r15) ; CHECK-O0: br %r14 entry: %cond = icmp ne i32 %cc, 0 @@ -169,11 +167,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc ; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: mvi 8(%r[[REG1]]), 1 ; CHECK-O0: jnh ; reload from stack -; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS:[0-9]+]](%r15) -; CHECK-O0: lgr %r9, %r[[REG2]] +; CHECK-O0: lg %r9, [[OFFS:[0-9]+]](%r15) ; CHECK-O0: br %r14 entry: br label %bb_loop @@ -214,18 +211,17 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: lghi %r{{.*}}, 16 ; spill sret to stack -; CHECK-O0: stg %r2, [[OFFS1:[0-9]+]](%r15) -; CHECK-O0: lgr %r2, %r{{.*}} -; CHECK-O0: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0-DAG: stg %r2, [[OFFS1:[0-9]+]](%r15) +; CHECK-O0-DAG: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr {{.*}}, %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0-DAG: lgr %r[[REG3:[0-9]+]], %r2 +; CHECK-O0-DAG: mvi 8(%r[[REG3]]), 1 ; CHECK-O0-DAG: lg %r[[REG1:[0-9]+]], [[OFFS1]](%r15) +; CHECK-O0-DAG: lgr %r9, %r[[REG3]] ; CHECK-O0-DAG: l %r[[REG2:[0-9]+]], [[OFFS2]](%r15) ; CHECK-O0: st %r[[REG2]], 4(%r[[REG1]]) -; CHECK-O0: lgr %r9, {{.*}} ; CHECK-O0: br %r14 entry: %call = call i8* @malloc(i64 16) @@ -255,8 +251,6 @@ define float @caller3(i8* %error_ref) { ; CHECK-O0-LABEL: caller3: ; CHECK-O0: lghi %r9, 0 ; CHECK-O0: lhi %r3, 1 -; CHECK-O0: stg %r2, {{.*}}(%r15) -; CHECK-O0: lgr %r2, {{.*}} ; CHECK-O0: brasl %r14, foo_sret ; CHECK-O0: lgr {{.*}}, %r9 ; CHECK-O0: cghi %r9, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll index 38a7e1dbba193..94a1ec9380fb2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll @@ -404,7 +404,7 @@ for.cond.cleanup: } ; CHECK-MID: check_negated_xor_wls -; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3 +; CHECK-MID: t2WhileLoopStart renamable $r2, %bb.3 ; CHECK-MID: tB %bb.1 ; CHECK-MID: bb.1.while.body.preheader: ; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1 @@ -437,7 +437,7 @@ while.end: } ; CHECK-MID: check_negated_cmp_wls -; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3 +; CHECK-MID: t2WhileLoopStart renamable $r2, %bb.3 ; CHECK-MID: tB %bb.1 ; CHECK-MID: bb.1.while.body.preheader: ; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1 diff --git a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir index ace7a38ec10b1..4e2197c0f0f02 100644 --- a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir +++ b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir @@ -38,10 +38,8 @@ body: | bb.0.entry: ; CHECK-LABEL: name: constraint_h ; CHECK: renamable $r0 = tLDRspi %stack.0.i, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i) - ; CHECK: renamable $r12 = COPY killed renamable $r0 - ; CHECK: t2STRi12 killed $r12, %stack.1, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: $r8 = t2LDRi12 %stack.1, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, renamable $r8, 12 /* clobber */, implicit-def early-clobber $r12 + ; CHECK: renamable $r8 = COPY killed renamable $r0 + ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, killed renamable $r8, 12 /* clobber */, implicit-def dead early-clobber $r12 ; CHECK: tBX_RET 14 /* CC::al */, $noreg %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) %0:hgpr = COPY %1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll b/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll index 3a33825a0b0de..647ad2e8182e8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll @@ -10,13 +10,11 @@ define arm_aapcs_vfpcc void @spill_vector_i32(<4 x i32> %v, <4 x i32>* %p) { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -33,13 +31,11 @@ define arm_aapcs_vfpcc void @spill_vector_i16(<8 x i16> %v, <8 x i16>* %p) { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -56,13 +52,11 @@ define arm_aapcs_vfpcc void @spill_vector_i8(<16 x i8> %v, <16 x i8>* %p) { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -79,13 +73,11 @@ define arm_aapcs_vfpcc void @spill_vector_i64(<2 x i64> %v, <2 x i64>* %p) { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -102,13 +94,11 @@ define arm_aapcs_vfpcc void @spill_vector_f32(<4 x float> %v, <4 x float>* %p) { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -125,13 +115,11 @@ define arm_aapcs_vfpcc void @spill_vector_f16(<8 x half> %v, <8 x half>* %p) { ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -146,15 +134,15 @@ define arm_aapcs_vfpcc void @spill_vector_f64(<2 x double> %v, <2 x double>* %p) ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: .pad #40 +; CHECK-NEXT: sub sp, #40 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: bl external_function -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} entry: call void @external_function() diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll index b5635c7e0f067..3f4af707ff820 100644 --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -20,18 +20,15 @@ define i32 @z() nounwind ssp { ; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl $8, %ecx -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: addl $36, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: addl $36, %edi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movb %cl, 32(%eax) @@ -42,11 +39,11 @@ define i32 @z() nounwind ssp { ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: ## %bb.1: ## %return ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %ecx -; CHECK-NEXT: movl (%ecx), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: cmpl %edx, %ecx ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: jne LBB0_3 ; CHECK-NEXT: ## %bb.2: ## %SP_return ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload diff --git a/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll index 6a43e864e965d..54540f3e65389 100644 --- a/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll +++ b/llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll @@ -10,12 +10,8 @@ target triple = "i386-apple-darwin10" define i32 @func(i8* %s) nounwind ssp { ; CHECK-LABEL: func: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill -; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: arg0 %eax ; CHECK-NEXT: arg1 %ecx @@ -23,10 +19,7 @@ define i32 @func(i8* %s) nounwind ssp { ; CHECK-NEXT: arg3 %esi ; CHECK-NEXT: arg4 %ecx ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: movl %ecx, %edi -; CHECK-NEXT: addl $4, %esp ; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl entry: %0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1] diff --git a/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll index 5ef867d4f9dcf..70228bb47f4d8 100644 --- a/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll +++ b/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll @@ -15,20 +15,20 @@ ; CHECK-LABEL: @test_bitcast ; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx ; Load the function pointer. ; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; Load the third argument +; CHECK: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK: movq %rdx, %rdi +; Load the second argument +; CHECK: movq %rdx, %rsi ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret @@ -54,20 +54,20 @@ label_end: ; CHECK-LABEL: @test_inttoptr ; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] -; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] ; Spill %loaded_ptr. +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] +; Spill %arg2. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx ; Load the function pointer. ; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; Load the third argument +; CHECK: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK: movq %rdx, %rdi +; Load the second argument +; CHECK: movq %rdx, %rsi ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret @@ -92,21 +92,21 @@ label_end: } ; CHECK-LABEL: @test_ptrtoint -; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; Load the value of the function pointer: %loaded_ptr +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx ; Load the function pointer. ; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; Load the third argument +; CHECK: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK: movq %rdx, %rdi +; Load the second argument +; CHECK: movq %rdx, %rsi ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret diff --git a/llvm/test/CodeGen/X86/atomic-monotonic.ll b/llvm/test/CodeGen/X86/atomic-monotonic.ll index 9cab2d7d2b256..b1eecdfdc0b2c 100644 --- a/llvm/test/CodeGen/X86/atomic-monotonic.ll +++ b/llvm/test/CodeGen/X86/atomic-monotonic.ll @@ -14,8 +14,8 @@ define i8 @load_i8(i8* %ptr) { define void @store_i8(i8* %ptr, i8 %v) { ; CHECK-O0-LABEL: store_i8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi -; CHECK-O0-NEXT: movb %sil, (%rdi) +; CHECK-O0-NEXT: movb %sil, %al +; CHECK-O0-NEXT: movb %al, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i8: @@ -44,8 +44,8 @@ define i16 @load_i16(i16* %ptr) { define void @store_i16(i16* %ptr, i16 %v) { ; CHECK-O0-LABEL: store_i16: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi -; CHECK-O0-NEXT: movw %si, (%rdi) +; CHECK-O0-NEXT: movw %si, %ax +; CHECK-O0-NEXT: movw %ax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i16: diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 7a1f34c65c183..7b255c7b6c1ae 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -16,8 +16,8 @@ define i8 @load_i8(i8* %ptr) { define void @store_i8(i8* %ptr, i8 %v) { ; CHECK-O0-LABEL: store_i8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi -; CHECK-O0-NEXT: movb %sil, (%rdi) +; CHECK-O0-NEXT: movb %sil, %al +; CHECK-O0-NEXT: movb %al, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i8: @@ -46,8 +46,8 @@ define i16 @load_i16(i16* %ptr) { define void @store_i16(i16* %ptr, i16 %v) { ; CHECK-O0-LABEL: store_i16: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi -; CHECK-O0-NEXT: movw %si, (%rdi) +; CHECK-O0-NEXT: movw %si, %ax +; CHECK-O0-NEXT: movw %ax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i16: @@ -231,11 +231,10 @@ define i128 @load_i128(i128* %ptr) { ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; CHECK-O0-NEXT: movl %eax, %ebx +; CHECK-O0-NEXT: movq %rbx, %rax +; CHECK-O0-NEXT: movq %rbx, %rdx +; CHECK-O0-NEXT: movq %rbx, %rcx ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O0-NEXT: popq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -264,24 +263,24 @@ define void @store_i128(i128* %ptr, i128 %v) { ; CHECK-O0-NEXT: pushq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movq 8(%rdi), %rcx ; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq (%rdi), %rax +; CHECK-O0-NEXT: movq 8(%rdi), %rdx ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: jmp .LBB16_1 ; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start ; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; CHECK-O0-NEXT: lock cmpxchg16b (%rsi) -; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: jne .LBB16_1 ; CHECK-O0-NEXT: jmp .LBB16_2 ; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end @@ -317,24 +316,22 @@ define i256 @load_i256(i256* %ptr) { ; CHECK-O0-NEXT: subq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-O0-NEXT: movq %rdi, %rax -; CHECK-O0-NEXT: movl $32, %ecx -; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; CHECK-O0-NEXT: xorl %r8d, %r8d -; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq %rcx, %rdi -; CHECK-O0-NEXT: movl %r8d, %ecx ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movl $32, %edi +; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: callq __atomic_load -; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; CHECK-O0-NEXT: movq %rsi, 24(%rdi) -; CHECK-O0-NEXT: movq %rdx, 16(%rdi) -; CHECK-O0-NEXT: movq %rcx, 8(%rdi) -; CHECK-O0-NEXT: movq %rax, (%rdi) -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; CHECK-O0-NEXT: movq %r8, 24(%rdi) +; CHECK-O0-NEXT: movq %rsi, 16(%rdi) +; CHECK-O0-NEXT: movq %rdx, 8(%rdi) +; CHECK-O0-NEXT: movq %rcx, (%rdi) ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -369,18 +366,18 @@ define void @store_i256(i256* %ptr, i256 %v) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: subq $40, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 -; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9 -; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movq %rcx, %rax +; CHECK-O0-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; CHECK-O0-NEXT: movq %rsi, %r9 +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload +; CHECK-O0-NEXT: xorl %ecx, %ecx +; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-O0-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) -; CHECK-O0-NEXT: movl $32, %ecx -; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill -; CHECK-O0-NEXT: movq %rcx, %rdi -; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload -; CHECK-O0-NEXT: movq %r9, %rdx -; CHECK-O0-NEXT: movl %eax, %ecx +; CHECK-O0-NEXT: movl $32, %edi ; CHECK-O0-NEXT: callq __atomic_store ; CHECK-O0-NEXT: addq $40, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -411,10 +408,10 @@ define void @store_i256(i256* %ptr, i256 %v) { define void @vec_store(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: vec_store: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax -; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) -; CHECK-O0-CUR-NEXT: movl %ecx, 4(%rdi) +; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx +; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax +; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) +; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: vec_store: @@ -448,10 +445,10 @@ define void @vec_store(i32* %p0, <2 x i32> %vec) { define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: vec_store_unaligned: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax -; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) -; CHECK-O0-CUR-NEXT: movl %ecx, 4(%rdi) +; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx +; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax +; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) +; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: vec_store_unaligned: @@ -594,17 +591,11 @@ define i64 @load_fold_add1(i64* %p) { } define i64 @load_fold_add2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_add2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: addq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_add2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: addq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_add2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: addq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret @@ -694,17 +685,11 @@ define i64 @load_fold_mul1(i64* %p) { } define i64 @load_fold_mul2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_mul2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: imulq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_mul2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: imulq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_mul2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: imulq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret @@ -1129,8 +1114,8 @@ define i64 @load_fold_shl1(i64* %p) { define i64 @load_fold_shl2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_shl2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rsi, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shlq %cl, %rax ; CHECK-O0-NEXT: retq @@ -1179,8 +1164,8 @@ define i64 @load_fold_lshr1(i64* %p) { define i64 @load_fold_lshr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_lshr2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rsi, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shrq %cl, %rax ; CHECK-O0-NEXT: retq @@ -1229,8 +1214,8 @@ define i64 @load_fold_ashr1(i64* %p) { define i64 @load_fold_ashr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_ashr2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rsi, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: sarq %cl, %rax ; CHECK-O0-NEXT: retq @@ -1283,17 +1268,11 @@ define i64 @load_fold_and1(i64* %p) { } define i64 @load_fold_and2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_and2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: andq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_and2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: andq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_and2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: andq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret @@ -1336,17 +1315,11 @@ define i64 @load_fold_or1(i64* %p) { } define i64 @load_fold_or2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_or2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: orq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_or2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: orq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_or2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: orq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret @@ -1389,17 +1362,11 @@ define i64 @load_fold_xor1(i64* %p) { } define i64 @load_fold_xor2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_xor2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: xorq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_xor2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: xorq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_xor2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: xorq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret @@ -1434,9 +1401,7 @@ define i1 @load_fold_icmp1(i64* %p) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq $15, %rax -; CHECK-O0-NEXT: sete %cl -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_icmp1: @@ -1454,9 +1419,7 @@ define i1 @load_fold_icmp2(i64* %p, i64 %v2) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq %rsi, %rax -; CHECK-O0-NEXT: sete %cl -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_icmp2: @@ -1475,9 +1438,7 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax -; CHECK-O0-NEXT: sete %cl -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_icmp3: @@ -1620,17 +1581,17 @@ define void @rmw_fold_mul2(i64* %p, i64 %v) { define void @rmw_fold_sdiv1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_sdiv1: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: imulq %rcx -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-O0-NEXT: addq %rax, %rdx -; CHECK-O0-NEXT: movq %rdx, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rcx +; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-O0-NEXT: movq %rcx, %rax +; CHECK-O0-NEXT: imulq %rdx +; CHECK-O0-NEXT: movq %rdx, %rax +; CHECK-O0-NEXT: addq %rcx, %rax +; CHECK-O0-NEXT: movq %rax, %rcx ; CHECK-O0-NEXT: shrq $63, %rcx -; CHECK-O0-NEXT: sarq $3, %rdx -; CHECK-O0-NEXT: addq %rcx, %rdx -; CHECK-O0-NEXT: movq %rdx, (%rdi) +; CHECK-O0-NEXT: sarq $3, %rax +; CHECK-O0-NEXT: addq %rcx, %rax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_sdiv1: @@ -1761,16 +1722,17 @@ define void @rmw_fold_srem1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_srem1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: imulq %rcx ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-O0-NEXT: addq %rax, %rdx ; CHECK-O0-NEXT: movq %rdx, %rcx -; CHECK-O0-NEXT: shrq $63, %rcx -; CHECK-O0-NEXT: sarq $3, %rdx -; CHECK-O0-NEXT: addq %rcx, %rdx -; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rcx +; CHECK-O0-NEXT: addq %rax, %rcx +; CHECK-O0-NEXT: movq %rcx, %rdx +; CHECK-O0-NEXT: shrq $63, %rdx +; CHECK-O0-NEXT: sarq $3, %rcx +; CHECK-O0-NEXT: addq %rdx, %rcx +; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx ; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) @@ -1932,9 +1894,9 @@ define void @rmw_fold_shl2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_shl2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi +; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx -; CHECK-O0-NEXT: movb %sil, %cl +; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq @@ -1988,9 +1950,9 @@ define void @rmw_fold_lshr2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_lshr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi +; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx -; CHECK-O0-NEXT: movb %sil, %cl +; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq @@ -2044,9 +2006,9 @@ define void @rmw_fold_ashr2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_ashr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi +; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx -; CHECK-O0-NEXT: movb %sil, %cl +; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq @@ -2268,12 +2230,12 @@ define i32 @fold_trunc_or(i64* %p, i32 %v2) { define i32 @split_load(i64* %p) { ; CHECK-O0-LABEL: split_load: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movb %al, %cl -; CHECK-O0-NEXT: shrq $32, %rax -; CHECK-O0-NEXT: # kill: def $al killed $al killed $rax -; CHECK-O0-NEXT: orb %al, %cl -; CHECK-O0-NEXT: movzbl %cl, %eax +; CHECK-O0-NEXT: movq (%rdi), %rcx +; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: shrq $32, %rcx +; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-O0-NEXT: orb %cl, %al +; CHECK-O0-NEXT: movzbl %al, %eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: split_load: @@ -2411,8 +2373,8 @@ define i64 @nofold_stfence(i64* %p) { define i64 @fold_constant(i64 %arg) { ; CHECK-O0-LABEL: fold_constant: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: addq Constant, %rdi ; CHECK-O0-NEXT: movq %rdi, %rax +; CHECK-O0-NEXT: addq Constant, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: fold_constant: @@ -2602,10 +2564,9 @@ define i32 @load_i8_anyext_i32(i8* %ptr) { define i32 @load_i16_anyext_i32(i16* %ptr) { ; CHECK-O0-CUR-LABEL: load_i16_anyext_i32: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: movw (%rdi), %ax -; CHECK-O0-CUR-NEXT: # implicit-def: $ecx -; CHECK-O0-CUR-NEXT: movw %ax, %cx -; CHECK-O0-CUR-NEXT: movl %ecx, %eax +; CHECK-O0-CUR-NEXT: movw (%rdi), %cx +; CHECK-O0-CUR-NEXT: # implicit-def: $eax +; CHECK-O0-CUR-NEXT: movw %cx, %ax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i16_anyext_i32: @@ -2633,10 +2594,10 @@ define i32 @load_i16_anyext_i32(i16* %ptr) { define i64 @load_i16_anyext_i64(i16* %ptr) { ; CHECK-O0-CUR-LABEL: load_i16_anyext_i64: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: movw (%rdi), %ax -; CHECK-O0-CUR-NEXT: # implicit-def: $ecx -; CHECK-O0-CUR-NEXT: movw %ax, %cx -; CHECK-O0-CUR-NEXT: vmovd %ecx, %xmm0 +; CHECK-O0-CUR-NEXT: movw (%rdi), %cx +; CHECK-O0-CUR-NEXT: # implicit-def: $eax +; CHECK-O0-CUR-NEXT: movw %cx, %ax +; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0 ; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax ; CHECK-O0-CUR-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll index 05a10966a4f1a..022aa38a4554c 100644 --- a/llvm/test/CodeGen/X86/atomic32.ll +++ b/llvm/test/CodeGen/X86/atomic32.ll @@ -71,9 +71,8 @@ define void @atomic_fetch_and32() nounwind { ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -95,10 +94,9 @@ define void @atomic_fetch_and32() nounwind { ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB2_2 ; X86-NEXT: jmp .LBB2_1 ; X86-NEXT: .LBB2_2: # %atomicrmw.end @@ -125,9 +123,8 @@ define void @atomic_fetch_or32() nounwind { ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 @@ -149,10 +146,9 @@ define void @atomic_fetch_or32() nounwind { ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB3_2 ; X86-NEXT: jmp .LBB3_1 ; X86-NEXT: .LBB3_2: # %atomicrmw.end @@ -179,9 +175,8 @@ define void @atomic_fetch_xor32() nounwind { ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 @@ -203,10 +198,9 @@ define void @atomic_fetch_xor32() nounwind { ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB4_2 ; X86-NEXT: jmp .LBB4_1 ; X86-NEXT: .LBB4_2: # %atomicrmw.end @@ -223,15 +217,16 @@ define void @atomic_fetch_xor32() nounwind { define void @atomic_fetch_nand32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_nand32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB5_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl %edx, %ecx +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl @@ -244,26 +239,27 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: -; X86-NEXT: subl $8, %esp +; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl sc32, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl sc32, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB5_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl %edx, %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl ; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end -; X86-NEXT: addl $8, %esp +; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -272,21 +268,21 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_max32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB6_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmovgl %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end @@ -296,21 +292,21 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovgl %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmovgl %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end @@ -319,21 +315,19 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jg .LBB6_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 @@ -341,39 +335,33 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_max32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB6_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jg .LBB6_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1 @@ -381,20 +369,16 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB6_2 ; X86-NOX87-NEXT: jmp .LBB6_1 ; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void @@ -403,21 +387,21 @@ define void @atomic_fetch_max32(i32 %x) nounwind { define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_min32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB7_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovlel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmovlel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end @@ -427,21 +411,21 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovlel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmovlel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end @@ -450,21 +434,19 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jle .LBB7_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 @@ -472,39 +454,33 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_min32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB7_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jle .LBB7_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1 @@ -512,20 +488,16 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB7_2 ; X86-NOX87-NEXT: jmp .LBB7_1 ; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void @@ -534,21 +506,21 @@ define void @atomic_fetch_min32(i32 %x) nounwind { define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umax32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB8_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmoval %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmoval %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end @@ -558,21 +530,21 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmoval %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmoval %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end @@ -581,21 +553,19 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: ja .LBB8_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 @@ -603,39 +573,33 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umax32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB8_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: ja .LBB8_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1 @@ -643,20 +607,16 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB8_2 ; X86-NOX87-NEXT: jmp .LBB8_1 ; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void @@ -665,21 +625,21 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umin32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB9_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovbel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmovbel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end @@ -689,21 +649,21 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovbel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmovbel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end @@ -712,21 +672,19 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jbe .LBB9_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 @@ -734,39 +692,33 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umin32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB9_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jbe .LBB9_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1 @@ -774,20 +726,16 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB9_2 ; X86-NOX87-NEXT: jmp .LBB9_1 ; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll index 963561dc8deb2..452bcb254e0d5 100644 --- a/llvm/test/CodeGen/X86/atomic64.ll +++ b/llvm/test/CodeGen/X86/atomic64.ll @@ -17,46 +17,37 @@ define void @atomic_fetch_add64() nounwind { ; ; I486-LABEL: atomic_fetch_add64: ; I486: # %bb.0: # %entry -; I486-NEXT: pushl %esi -; I486-NEXT: subl $48, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $1, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $1, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $3, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: addl $48, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl entry: %t1 = atomicrmw add i64* @sc64, i64 1 acquire @@ -78,46 +69,37 @@ define void @atomic_fetch_sub64() nounwind { ; ; I486-LABEL: atomic_fetch_sub64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $48, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $1, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $1, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $3, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: addl $48, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw sub i64* @sc64, i64 1 acquire %t2 = atomicrmw sub i64* @sc64, i64 3 acquire @@ -140,9 +122,8 @@ define void @atomic_fetch_and64() nounwind { ; X64-NEXT: # kill: def $rcx killed $ecx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -153,36 +134,30 @@ define void @atomic_fetch_and64() nounwind { ; ; I486-LABEL: atomic_fetch_and64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $36, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $3, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_and_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_and_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_and_8 -; I486-NEXT: addl $36, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw and i64* @sc64, i64 3 acquire %t2 = atomicrmw and i64* @sc64, i64 5 acquire @@ -203,9 +178,8 @@ define void @atomic_fetch_or64() nounwind { ; X64-NEXT: orq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 @@ -216,36 +190,30 @@ define void @atomic_fetch_or64() nounwind { ; ; I486-LABEL: atomic_fetch_or64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $36, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $3, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_or_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_or_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_or_8 -; I486-NEXT: addl $36, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw or i64* @sc64, i64 3 acquire %t2 = atomicrmw or i64* @sc64, i64 5 acquire @@ -266,9 +234,8 @@ define void @atomic_fetch_xor64() nounwind { ; X64-NEXT: xorq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 @@ -279,36 +246,30 @@ define void @atomic_fetch_xor64() nounwind { ; ; I486-LABEL: atomic_fetch_xor64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $36, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $3, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_xor_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_xor_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_xor_8 -; I486-NEXT: addl $36, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw xor i64* @sc64, i64 3 acquire %t2 = atomicrmw xor i64* @sc64, i64 5 acquire @@ -319,15 +280,16 @@ define void @atomic_fetch_xor64() nounwind { define void @atomic_fetch_nand64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_nand64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB5_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: movq %rax, %rcx ; X64-NEXT: andq %rdx, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: notq %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl @@ -340,20 +302,17 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { ; ; I486-LABEL: atomic_fetch_nand64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $20, %esp -; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: subl $16, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: leal sc64, %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %eax, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_nand_8 -; I486-NEXT: addl $20, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw nand i64* @sc64, i64 %x acquire ret void @@ -362,21 +321,21 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { define void @atomic_fetch_max64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_max64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB6_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovgq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovgq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end @@ -386,70 +345,65 @@ define void @atomic_fetch_max64(i64 %x) nounwind { ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB6_1 ; I486-NEXT: .LBB6_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jl .LBB6_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB6_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB6_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB6_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB6_1 ; I486-NEXT: jmp .LBB6_2 ; I486-NEXT: .LBB6_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw max i64* @sc64, i64 %x acquire @@ -460,21 +414,21 @@ define void @atomic_fetch_max64(i64 %x) nounwind { define void @atomic_fetch_min64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_min64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB7_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovleq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovleq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end @@ -484,70 +438,65 @@ define void @atomic_fetch_min64(i64 %x) nounwind { ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB7_1 ; I486-NEXT: .LBB7_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jge .LBB7_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB7_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB7_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB7_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB7_1 ; I486-NEXT: jmp .LBB7_2 ; I486-NEXT: .LBB7_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw min i64* @sc64, i64 %x acquire @@ -558,21 +507,21 @@ define void @atomic_fetch_min64(i64 %x) nounwind { define void @atomic_fetch_umax64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_umax64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB8_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovaq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovaq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end @@ -582,70 +531,65 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB8_1 ; I486-NEXT: .LBB8_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jb .LBB8_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB8_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB8_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB8_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB8_1 ; I486-NEXT: jmp .LBB8_2 ; I486-NEXT: .LBB8_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw umax i64* @sc64, i64 %x acquire @@ -656,21 +600,21 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { define void @atomic_fetch_umin64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_umin64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB9_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovbeq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovbeq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end @@ -680,70 +624,65 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB9_1 ; I486-NEXT: .LBB9_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jae .LBB9_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB9_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB9_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB9_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB9_1 ; I486-NEXT: jmp .LBB9_2 ; I486-NEXT: .LBB9_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw umin i64* @sc64, i64 %x acquire @@ -765,19 +704,18 @@ define void @atomic_fetch_cmpxchg64() nounwind { ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp ; I486-NEXT: andl $-8, %esp -; I486-NEXT: subl $40, %esp +; I486-NEXT: subl $32, %esp ; I486-NEXT: leal sc64, %eax ; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx ; I486-NEXT: movl $0, {{[0-9]+}}(%esp) ; I486-NEXT: movl $0, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edx -; I486-NEXT: movl %ecx, 4(%edx) -; I486-NEXT: movl $2, 20(%edx) -; I486-NEXT: movl $2, 16(%edx) -; I486-NEXT: movl $0, 12(%edx) -; I486-NEXT: movl $1, 8(%edx) -; I486-NEXT: movl $sc64, (%edx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $0, 12(%eax) +; I486-NEXT: movl $1, 8(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 ; I486-NEXT: movl %ebp, %esp ; I486-NEXT: popl %ebp @@ -794,20 +732,17 @@ define void @atomic_fetch_store64(i64 %x) nounwind { ; ; I486-LABEL: atomic_fetch_store64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $20, %esp -; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: subl $16, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: leal sc64, %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %eax, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $3, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $3, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_store_8 -; I486-NEXT: addl $20, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl store atomic i64 %x, i64* @sc64 release, align 8 ret void @@ -821,20 +756,17 @@ define void @atomic_fetch_swap64(i64 %x) nounwind { ; ; I486-LABEL: atomic_fetch_swap64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $20, %esp -; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: subl $16, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: leal sc64, %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %eax, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_exchange_8 -; I486-NEXT: addl $20, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire ret void @@ -851,23 +783,20 @@ define void @atomic_fetch_swapf64(double %x) nounwind { ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp -; I486-NEXT: subl $40, %esp +; I486-NEXT: subl $24, %esp ; I486-NEXT: fldl 8(%ebp) ; I486-NEXT: leal fsc64, %eax ; I486-NEXT: fstpl {{[0-9]+}}(%esp) ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx ; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $fsc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $fsc64, (%eax) ; I486-NEXT: calll __atomic_exchange_8 -; I486-NEXT: leal -4(%ebp), %esp -; I486-NEXT: popl %esi +; I486-NEXT: movl %ebp, %esp ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw xchg double* @fsc64, double %x acquire diff --git a/llvm/test/CodeGen/X86/atomic6432.ll b/llvm/test/CodeGen/X86/atomic6432.ll index 31cc795368244..b83d7ba09ac3c 100644 --- a/llvm/test/CodeGen/X86/atomic6432.ll +++ b/llvm/test/CodeGen/X86/atomic6432.ll @@ -7,106 +7,98 @@ define void @atomic_fetch_add64() nounwind { ; X32-LABEL: atomic_fetch_add64: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $56, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_1 ; X32-NEXT: .LBB0_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_1 ; X32-NEXT: jmp .LBB0_2 ; X32-NEXT: .LBB0_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_3 ; X32-NEXT: .LBB0_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $3, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_3 ; X32-NEXT: jmp .LBB0_4 ; X32-NEXT: .LBB0_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_5 ; X32-NEXT: .LBB0_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $5, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $5, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_5 ; X32-NEXT: jmp .LBB0_6 ; X32-NEXT: .LBB0_6: # %atomicrmw.end7 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_7 ; X32-NEXT: .LBB0_7: # %atomicrmw.start14 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_7 ; X32-NEXT: jmp .LBB0_8 ; X32-NEXT: .LBB0_8: # %atomicrmw.end13 -; X32-NEXT: addl $56, %esp +; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl entry: @@ -121,106 +113,98 @@ define void @atomic_fetch_sub64() nounwind { ; X32-LABEL: atomic_fetch_sub64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $56, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_1 ; X32-NEXT: .LBB1_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $-1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $-1, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $-1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_1 ; X32-NEXT: jmp .LBB1_2 ; X32-NEXT: .LBB1_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_3 ; X32-NEXT: .LBB1_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $-3, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $-1, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $-3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_3 ; X32-NEXT: jmp .LBB1_4 ; X32-NEXT: .LBB1_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_5 ; X32-NEXT: .LBB1_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $-5, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $-1, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $-5, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_5 ; X32-NEXT: jmp .LBB1_6 ; X32-NEXT: .LBB1_6: # %atomicrmw.end7 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_7 ; X32-NEXT: .LBB1_7: # %atomicrmw.start14 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: subl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: sbbl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: subl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: sbbl %esi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_7 ; X32-NEXT: jmp .LBB1_8 ; X32-NEXT: .LBB1_8: # %atomicrmw.end13 -; X32-NEXT: addl $56, %esp +; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw sub i64* @sc64, i64 1 acquire @@ -234,83 +218,75 @@ define void @atomic_fetch_and64() nounwind { ; X32-LABEL: atomic_fetch_and64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $44, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $52, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_1 ; X32-NEXT: .LBB2_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: xorl %esi, %esi -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_1 ; X32-NEXT: jmp .LBB2_2 ; X32-NEXT: .LBB2_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_3 ; X32-NEXT: .LBB2_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: andl $1, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_3 ; X32-NEXT: jmp .LBB2_4 ; X32-NEXT: .LBB2_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_5 ; X32-NEXT: .LBB2_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: andl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_5 ; X32-NEXT: jmp .LBB2_6 ; X32-NEXT: .LBB2_6: # %atomicrmw.end7 -; X32-NEXT: addl $44, %esp +; X32-NEXT: addl $52, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw and i64* @sc64, i64 3 acquire @@ -323,84 +299,75 @@ define void @atomic_fetch_or64() nounwind { ; X32-LABEL: atomic_fetch_or64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $52, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .LBB3_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: orl $3, %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: orl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_1 ; X32-NEXT: jmp .LBB3_2 ; X32-NEXT: .LBB3_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_3 ; X32-NEXT: .LBB3_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: orl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: orl $1, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_3 ; X32-NEXT: jmp .LBB3_4 ; X32-NEXT: .LBB3_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_5 ; X32-NEXT: .LBB3_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: orl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: orl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_5 ; X32-NEXT: jmp .LBB3_6 ; X32-NEXT: .LBB3_6: # %atomicrmw.end7 -; X32-NEXT: addl $48, %esp +; X32-NEXT: addl $52, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw or i64* @sc64, i64 3 acquire @@ -413,84 +380,75 @@ define void @atomic_fetch_xor64() nounwind { ; X32-LABEL: atomic_fetch_xor64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $52, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_1 ; X32-NEXT: .LBB4_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: xorl $3, %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_1 ; X32-NEXT: jmp .LBB4_2 ; X32-NEXT: .LBB4_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_3 ; X32-NEXT: .LBB4_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: xorl $1, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_3 ; X32-NEXT: jmp .LBB4_4 ; X32-NEXT: .LBB4_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_5 ; X32-NEXT: .LBB4_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: xorl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_5 ; X32-NEXT: jmp .LBB4_6 ; X32-NEXT: .LBB4_6: # %atomicrmw.end7 -; X32-NEXT: addl $48, %esp +; X32-NEXT: addl $52, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw xor i64* @sc64, i64 3 acquire @@ -505,36 +463,39 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $16, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB5_1 ; X32-NEXT: .LBB5_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: andl %esi, %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: andl %ebx, %edi -; X32-NEXT: notl %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: andl %edi, %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl %esi, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: notl %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: notl %ecx -; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB5_1 ; X32-NEXT: jmp .LBB5_2 ; X32-NEXT: .LBB5_2: # %atomicrmw.end -; X32-NEXT: addl $16, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -547,42 +508,41 @@ define void @atomic_fetch_max64(i64 %x) nounwind { ; X32-LABEL: atomic_fetch_max64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB6_1 ; X32-NEXT: .LBB6_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovll %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovll %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovll %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovll %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB6_1 ; X32-NEXT: jmp .LBB6_2 ; X32-NEXT: .LBB6_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw max i64* @sc64, i64 %x acquire @@ -593,42 +553,41 @@ define void @atomic_fetch_min64(i64 %x) nounwind { ; X32-LABEL: atomic_fetch_min64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB7_1 ; X32-NEXT: .LBB7_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovgel %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovgel %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovgel %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovgel %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB7_1 ; X32-NEXT: jmp .LBB7_2 ; X32-NEXT: .LBB7_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw min i64* @sc64, i64 %x acquire @@ -639,42 +598,41 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { ; X32-LABEL: atomic_fetch_umax64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB8_1 ; X32-NEXT: .LBB8_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovbl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovbl %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovbl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovbl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB8_1 ; X32-NEXT: jmp .LBB8_2 ; X32-NEXT: .LBB8_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw umax i64* @sc64, i64 %x acquire @@ -685,42 +643,41 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { ; X32-LABEL: atomic_fetch_umin64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB9_1 ; X32-NEXT: .LBB9_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovael %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovael %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovael %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovael %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB9_1 ; X32-NEXT: jmp .LBB9_2 ; X32-NEXT: .LBB9_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw umin i64* @sc64, i64 %x acquire @@ -731,14 +688,11 @@ define void @atomic_fetch_cmpxchg64() nounwind { ; X32-LABEL: atomic_fetch_cmpxchg64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %eax -; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: movl $1, %ebx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: addl $4, %esp ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire @@ -763,24 +717,24 @@ define void @atomic_fetch_swap64(i64 %x) nounwind { ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %edx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB12_1 ; X32-NEXT: .LBB12_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB12_1 ; X32-NEXT: jmp .LBB12_2 ; X32-NEXT: .LBB12_2: # %atomicrmw.end diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index f448bfec2ec99..7bd255c130259 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -34,27 +34,27 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* ; ; CHECK_O0-LABEL: test_256_load: ; CHECK_O0: # %bb.0: # %entry -; CHECK_O0-NEXT: subq $152, %rsp +; CHECK_O0-NEXT: subq $184, %rsp +; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0 +; CHECK_O0-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill ; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1 -; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2 -; CHECK_O0-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2 ; CHECK_O0-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK_O0-NEXT: callq dummy -; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; CHECK_O0-NEXT: vmovapd %ymm0, (%rax) -; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK_O0-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; CHECK_O0-NEXT: vmovaps %ymm1, (%rcx) +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdx) -; CHECK_O0-NEXT: addq $152, %rsp +; CHECK_O0-NEXT: vmovapd %ymm2, (%rdi) +; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi) +; CHECK_O0-NEXT: vmovdqa %ymm0, (%rdx) +; CHECK_O0-NEXT: addq $184, %rsp ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq entry: @@ -173,9 +173,10 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp ; ; CHECK_O0-LABEL: double_save: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq @@ -195,9 +196,10 @@ define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nou ; ; CHECK_O0-LABEL: double_save_volatile: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq @@ -272,11 +274,11 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind { ; ; CHECK_O0-LABEL: add8i32: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm0 +; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm2 ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq @@ -317,11 +319,11 @@ define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind { ; ; CHECK_O0-LABEL: add4i64a16: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm0 +; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm2 ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll index 186370ca675c7..1bc5e104512ea 100755 --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -19,44 +19,41 @@ define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %f ; CHECK: ## %bb.0: ; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $8, %eax -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: vpmovw2m %xmm0, %k0 ; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: callq _calc_expected_mask_val +; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; CHECK-NEXT: ## kill: def $eax killed $eax killed $rax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movzwl %ax, %esi -; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; CHECK-NEXT: kmovb %k0, %edi -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload -; CHECK-NEXT: vpmovd2m %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kmovd %k0, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; CHECK-NEXT: vpmovd2m %xmm0, %k0 +; CHECK-NEXT: ## kill: def $k1 killed $k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: movl $4, %edx ; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax -; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload -; CHECK-NEXT: movzwl %cx, %edi -; CHECK-NEXT: movzwl %ax, %esi +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %si ## 2-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax +; CHECK-NEXT: movzwl %si, %edi +; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: callq _check_mask16 ; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bug47278-eflags-error.mir b/llvm/test/CodeGen/X86/bug47278-eflags-error.mir new file mode 100644 index 0000000000000..e4e68451850b7 --- /dev/null +++ b/llvm/test/CodeGen/X86/bug47278-eflags-error.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s + +# Test for correct management of allocatable and non-allocatable +# live-ins in fastregalloc + +--- +name: live_through_ecx +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: live_through_ecx + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $ecx + ; CHECK: NOOP implicit $ecx + ; CHECK: bb.1: + ; CHECK: liveins: $ecx + ; CHECK: RET implicit killed $ecx + bb.0: + liveins: $ecx + NOOP implicit $ecx + + bb.1: + liveins: $ecx + + RET implicit $ecx + +... + +--- +name: live_out_ecx +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: live_out_ecx + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $eax, $ebx + ; CHECK: renamable $ecx = COPY killed $ebx + ; CHECK: bb.1: + ; CHECK: liveins: $ecx + ; CHECK: RET implicit killed $ecx + bb.0: + liveins: $eax, $ebx + %0:gr32 = COPY $eax + %1:gr32 = COPY $ebx + $ecx = COPY %1 + + bb.1: + liveins: $ecx + + RET implicit $ecx + +... + +--- +name: live_out_eflags +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: live_out_eflags + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $eax, $ebx + ; CHECK: TEST32rr killed renamable $eax, killed renamable $ebx, implicit-def $eflags + ; CHECK: bb.1: + ; CHECK: liveins: $eflags + ; CHECK: RET implicit killed $eflags + bb.0: + liveins: $eax, $ebx + %0:gr32 = COPY $eax + %1:gr32 = COPY $ebx + TEST32rr %0, %1, implicit-def $eflags + + bb.1: + liveins: $eflags + + RET implicit $eflags + +... diff --git a/llvm/test/CodeGen/X86/bug47278.mir b/llvm/test/CodeGen/X86/bug47278.mir new file mode 100644 index 0000000000000..d2ac8f19a85e7 --- /dev/null +++ b/llvm/test/CodeGen/X86/bug47278.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s + +# Make sure this case doesn't assert or try to assign $ecx to %1 on +# SHRD32rrCL + +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: foo + ; CHECK: renamable $eax = IMPLICIT_DEF + ; CHECK: renamable $edx = MOVZX32rm8 renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 16) + ; CHECK: dead renamable $ecx = MOV32rm renamable $eax, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, align 16) + ; CHECK: renamable $al = MOV8rm killed renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4) + ; CHECK: dead renamable $ecx = COPY renamable $edx + ; CHECK: dead renamable $ecx = COPY renamable $edx + ; CHECK: dead renamable $ecx = COPY renamable $edx + ; CHECK: renamable $esi = IMPLICIT_DEF + ; CHECK: renamable $ecx = IMPLICIT_DEF + ; CHECK: renamable $ecx = CMOV32rr renamable $ecx, killed renamable $esi, 2, implicit undef $eflags + ; CHECK: renamable $cl = MOV8ri -128 + ; CHECK: $cl = IMPLICIT_DEF + ; CHECK: renamable $eax = COPY renamable $edx + ; CHECK: dead renamable $eax = SHRD32rrCL renamable $eax, killed renamable $edx, implicit-def dead $eflags, implicit killed $cl + ; CHECK: RETL + %0:gr32 = IMPLICIT_DEF + %1:gr32 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 16) + %2:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, align 16) + %3:gr8 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4) + %4:gr32 = COPY %1 + %5:gr32 = COPY %1 + %6:gr32 = COPY %1 + %7:gr32 = IMPLICIT_DEF + %8:gr32 = IMPLICIT_DEF + %8:gr32 = CMOV32rr %8, killed %7, 2, implicit undef $eflags + %9:gr8 = MOV8ri -128 + %9:gr8 = COPY %3 + $cl = IMPLICIT_DEF + %8:gr32 = COPY %1 + %8:gr32 = SHRD32rrCL %8, %1, implicit-def dead $eflags, implicit $cl + RETL + +... diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll index 9f9e5584d6f21..54f7c7597e503 100644 --- a/llvm/test/CodeGen/X86/crash-O0.ll +++ b/llvm/test/CodeGen/X86/crash-O0.ll @@ -16,14 +16,15 @@ define i32 @div8() nounwind { ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill ; CHECK-NEXT: ## implicit-def: $rcx ; CHECK-NEXT: ## kill: def $cl killed $cl killed $rcx -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill ; CHECK-NEXT: movzbw %al, %ax ; CHECK-NEXT: divb %cl -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %dl ## 1-byte Reload -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill -; CHECK-NEXT: movzbw %dl, %ax +; CHECK-NEXT: movb %al, %dl +; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movzbw %al, %ax ; CHECK-NEXT: divb %cl ; CHECK-NEXT: shrw $8, %ax ; CHECK-NEXT: ## kill: def $al killed $al killed $ax @@ -31,11 +32,11 @@ define i32 @div8() nounwind { ; CHECK-NEXT: jae LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %"39" ; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: ## implicit-def: $edx -; CHECK-NEXT: imull %edx, %ecx -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: ## implicit-def: $ecx +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: je LBB0_3 ; CHECK-NEXT: LBB0_2: ## %"40" ; CHECK-NEXT: ud2 @@ -79,12 +80,11 @@ define i64 @addressModeWith32bitIndex(i32 %V) { ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: def $rax killed $eax -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: cqto -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload -; CHECK-NEXT: idivq (%rsi,%rcx,8) +; CHECK-NEXT: movslq %edi, %rsi +; CHECK-NEXT: idivq (%rcx,%rsi,8) ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq %gep = getelementptr i64, i64* null, i32 %V diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll index 664d9ded1e0e1..a66b74a19066b 100644 --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -8,7 +8,8 @@ bb: %tmp = load i32, i32* %p, align 4, !dbg !7 ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 - ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7 + ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax :: (store 8 into %stack.0) + ; CHECK-NEXT: SUB64ri8 renamable $rax, 3, implicit-def $eflags, debug-location !7 switch i32 %tmp, label %bb7 [ i32 0, label %bb1 diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll index e262448468ebd..9a54c8711f37b 100644 --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -19,7 +19,7 @@ exit: ; different basic block, so its operands aren't necessarily exported ; for cross-block usage. -; CHECK: movb %al, [[OFS:[0-9]*]](%rsp) +; CHECK: movb %cl, [[OFS:[0-9]*]](%rsp) ; CHECK: callq {{_?}}bar ; CHECK: movb [[OFS]](%rsp), %al diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index 7fffa21f0d24d..56c2812481cac 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -586,11 +586,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xfloat: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xfloat: @@ -628,11 +628,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xdouble: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xdouble: @@ -670,11 +670,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi8: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi8: @@ -712,11 +712,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi16: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi16: @@ -754,11 +754,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi32: @@ -796,11 +796,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xi64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xi64: @@ -889,6 +889,7 @@ define void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; + ; AVX512-LABEL: test_nt64xi8: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovntdq %zmm0, (%rdi) @@ -915,6 +916,7 @@ define void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; + ; AVX512-LABEL: test_nt32xi16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovntdq %zmm0, (%rdi) @@ -1008,16 +1010,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xfloat: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1062,16 +1064,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xdouble: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1116,16 +1118,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt64xi8: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1170,16 +1172,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi16: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1224,16 +1226,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1278,16 +1280,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: diff --git a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll index 17d2803e9ce11..6f3643436e65d 100644 --- a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll +++ b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll @@ -65,12 +65,15 @@ define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) { define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_ogt_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpltss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpltss %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ogt_f32: @@ -93,12 +96,15 @@ define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) { define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ogt_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmpltsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmpltsd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ogt_f64: @@ -121,12 +127,15 @@ define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) { define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_oge_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpless %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpless %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oge_f32: @@ -149,12 +158,15 @@ define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) { define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_oge_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmplesd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmplesd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oge_f64: @@ -501,12 +513,15 @@ define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) { define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_ult_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpnless %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpnless %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ult_f32: @@ -529,12 +544,15 @@ define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) { define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ult_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmpnlesd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmpnlesd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ult_f64: @@ -557,12 +575,15 @@ define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) { define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_ule_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpnltss %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ule_f32: @@ -585,12 +606,15 @@ define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) { define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ule_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmpnltsd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ule_f64: diff --git a/llvm/test/CodeGen/X86/fast-isel-select.ll b/llvm/test/CodeGen/X86/fast-isel-select.ll index 7865f9958ec53..5f65dde68a4a2 100644 --- a/llvm/test/CodeGen/X86/fast-isel-select.ll +++ b/llvm/test/CodeGen/X86/fast-isel-select.ll @@ -9,11 +9,11 @@ define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) { ; CHECK-LABEL: fastisel_select: ; CHECK: ## %bb.0: -; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi -; CHECK-NEXT: ## kill: def $dil killed $dil killed $edi +; CHECK-NEXT: movb %sil, %dl +; CHECK-NEXT: movb %dil, %cl ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: subb %dl, %cl +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: movl $1204476887, %ecx ## imm = 0x47CADBD7 ; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll index 30c8af288ac53..5d3c07fb46c31 100644 --- a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll @@ -299,8 +299,8 @@ define void @test23(i8* noalias sret %result) { ; CHECK-LABEL: test23: ; CHECK: movq %rdi, [[STACK:[0-9]+\(%rsp\)]] ; CHECK: call -; CHECK: movq [[STACK]], %rcx -; CHECK: movq %rcx, %rax +; CHECK-NEXT: movq [[STACK]], %rax +; CHECK-NEXT: addq $24, %rsp ; CHECK: ret } diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll index 14a233ed7fd4e..8392e3ed43f24 100644 --- a/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll +++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll @@ -61,12 +61,12 @@ define dso_local void @test_sign_ext(%struct.Foo* %f, i32* %i) { ; ; CHECK-O0-LABEL: test_sign_ext: ; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, %edx -; CHECK-O0-NEXT: sarl $31, %edx -; CHECK-O0-NEXT: movl %eax, 8(%ecx) -; CHECK-O0-NEXT: movl %edx, 12(%ecx) +; CHECK-O0-NEXT: movl %edx, %ecx +; CHECK-O0-NEXT: sarl $31, %ecx +; CHECK-O0-NEXT: movl %edx, 8(%eax) +; CHECK-O0-NEXT: movl %ecx, 12(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32* %i to i32 addrspace(272)* @@ -77,13 +77,21 @@ entry: } define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) { -; ALL-LABEL: test_zero_ext: -; ALL: # %bb.0: # %entry -; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax -; ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx -; ALL-NEXT: movl %eax, 8(%ecx) -; ALL-NEXT: movl $0, 12(%ecx) -; ALL-NEXT: jmp _use_foo # TAILCALL +; CHECK-LABEL: test_zero_ext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, 8(%ecx) +; CHECK-NEXT: movl $0, 12(%ecx) +; CHECK-NEXT: jmp _use_foo # TAILCALL +; +; CHECK-O0-LABEL: test_zero_ext: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, 8(%eax) +; CHECK-O0-NEXT: movl $0, 12(%eax) +; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(272)* %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 1 @@ -102,13 +110,10 @@ define dso_local void @test_trunc(%struct.Foo* %f, i32 addrspace(272)* %i) { ; ; CHECK-O0-LABEL: test_trunc: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushl %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %ecx, (%edx) -; CHECK-O0-NEXT: movl %eax, (%esp) # 4-byte Spill -; CHECK-O0-NEXT: popl %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(272)* %i to i32* @@ -119,12 +124,19 @@ entry: } define dso_local void @test_noop1(%struct.Foo* %f, i32* %i) { -; ALL-LABEL: test_noop1: -; ALL: # %bb.0: # %entry -; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax -; ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx -; ALL-NEXT: movl %eax, (%ecx) -; ALL-NEXT: jmp _use_foo # TAILCALL +; CHECK-LABEL: test_noop1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, (%ecx) +; CHECK-NEXT: jmp _use_foo # TAILCALL +; +; CHECK-O0-LABEL: test_noop1: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) +; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 0 store i32* %i, i32** %p32, align 8 @@ -144,11 +156,11 @@ define dso_local void @test_noop2(%struct.Foo* %f, i32 addrspace(272)* %i) { ; ; CHECK-O0-LABEL: test_noop2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %ecx, 8(%edx) -; CHECK-O0-NEXT: movl %eax, 12(%edx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %edx, 8(%eax) +; CHECK-O0-NEXT: movl %ecx, 12(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 1 @@ -171,11 +183,11 @@ define dso_local void @test_null_arg(%struct.Foo* %f) { ; CHECK-O0-LABEL: test_null_arg: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: subl $12, %esp -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl %esp, %ecx -; CHECK-O0-NEXT: movl %eax, (%ecx) -; CHECK-O0-NEXT: movl $0, 8(%ecx) -; CHECK-O0-NEXT: movl $0, 4(%ecx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-O0-NEXT: movl %esp, %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) +; CHECK-O0-NEXT: movl $0, 8(%eax) +; CHECK-O0-NEXT: movl $0, 4(%eax) ; CHECK-O0-NEXT: calll _test_noop2 ; CHECK-O0-NEXT: addl $12, %esp ; CHECK-O0-NEXT: retl @@ -196,12 +208,12 @@ define dso_local void @test_unrecognized(%struct.Foo* %f, i32 addrspace(14)* %i) ; ; CHECK-O0-LABEL: test_unrecognized: ; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, %edx -; CHECK-O0-NEXT: sarl $31, %edx -; CHECK-O0-NEXT: movl %eax, 8(%ecx) -; CHECK-O0-NEXT: movl %edx, 12(%ecx) +; CHECK-O0-NEXT: movl %edx, %ecx +; CHECK-O0-NEXT: sarl $31, %ecx +; CHECK-O0-NEXT: movl %edx, 8(%eax) +; CHECK-O0-NEXT: movl %ecx, 12(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(272)* @@ -221,13 +233,10 @@ define dso_local void @test_unrecognized2(%struct.Foo* %f, i32 addrspace(272)* % ; ; CHECK-O0-LABEL: test_unrecognized2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushl %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %ecx, 16(%edx) -; CHECK-O0-NEXT: movl %eax, (%esp) # 4-byte Spill -; CHECK-O0-NEXT: popl %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, 16(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(272)* %i to i32 addrspace(9)* @@ -238,32 +247,22 @@ entry: } define i32 @test_load_sptr32(i32 addrspace(270)* %i) { -; CHECK-LABEL: test_load_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl (%eax), %eax -; CHECK-NEXT: retl -; CHECK-O0-LABEL: test_load_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl (%eax), %eax -; CHECK-O0-NEXT: retl +; ALL-LABEL: test_load_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax +; ALL-NEXT: movl (%eax), %eax +; ALL-NEXT: retl entry: %0 = load i32, i32 addrspace(270)* %i, align 4 ret i32 %0 } define i32 @test_load_uptr32(i32 addrspace(271)* %i) { -; CHECK-LABEL: test_load_uptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl (%eax), %eax -; CHECK-NEXT: retl -; CHECK-O0-LABEL: test_load_uptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl (%eax), %eax -; CHECK-O0-NEXT: retl +; ALL-LABEL: test_load_uptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax +; ALL-NEXT: movl (%eax), %eax +; ALL-NEXT: retl entry: %0 = load i32, i32 addrspace(271)* %i, align 4 ret i32 %0 @@ -275,15 +274,12 @@ define i32 @test_load_ptr64(i32 addrspace(272)* %i) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl (%eax), %eax ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_load_ptr64: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushl %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl (%ecx), %ecx -; CHECK-O0-NEXT: movl %eax, (%esp) -; CHECK-O0-NEXT: movl %ecx, %eax -; CHECK-O0-NEXT: popl %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl (%eax), %eax ; CHECK-O0-NEXT: retl entry: %0 = load i32, i32 addrspace(272)* %i, align 8 @@ -297,11 +293,12 @@ define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_store_sptr32: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, (%ecx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: retl entry: store i32 %i, i32 addrspace(270)* %s, align 4 @@ -315,11 +312,12 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_store_uptr32: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, (%ecx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: retl entry: store i32 %i, i32 addrspace(271)* %s, align 4 @@ -333,12 +331,13 @@ define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_store_ptr64: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %edx, (%ecx) +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: retl entry: store i32 %i, i32 addrspace(272)* %s, align 8 diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll index b452606484b66..76f775b834e0b 100644 --- a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll +++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll @@ -88,8 +88,8 @@ define dso_local void @test_trunc(%struct.Foo* %f, i32* %i) { ; ; CHECK-O0-LABEL: test_trunc: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $edx killed $edx killed $rdx -; CHECK-O0-NEXT: movl %edx, (%rcx) +; CHECK-O0-NEXT: movl %edx, %eax +; CHECK-O0-NEXT: movl %eax, (%rcx) ; CHECK-O0-NEXT: jmp use_foo # TAILCALL entry: %0 = addrspacecast i32* %i to i32 addrspace(270)* @@ -150,8 +150,8 @@ define void @test_unrecognized(%struct.Foo* %f, i32 addrspace(14)* %i) { ; ; CHECK-O0-LABEL: test_unrecognized: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $edx killed $edx killed $rdx -; CHECK-O0-NEXT: movl %edx, (%rcx) +; CHECK-O0-NEXT: movl %edx, %eax +; CHECK-O0-NEXT: movl %eax, (%rcx) ; CHECK-O0-NEXT: jmp use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(270)* @@ -183,16 +183,11 @@ entry: } define i32 @test_load_sptr32(i32 addrspace(270)* %i) { -; CHECK-LABEL: test_load_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movslq %ecx, %rax -; CHECK-NEXT: movl (%rax), %eax -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_load_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movslq %ecx, %rax -; CHECK-O0-NEXT: movl (%rax), %eax -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_load_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movslq %ecx, %rax +; ALL-NEXT: movl (%rax), %eax +; ALL-NEXT: retq entry: %0 = load i32, i32 addrspace(270)* %i, align 4 ret i32 %0 @@ -204,6 +199,7 @@ define i32 @test_load_uptr32(i32 addrspace(271)* %i) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl (%rax), %eax ; CHECK-NEXT: retq +; ; CHECK-O0-LABEL: test_load_uptr32: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %ecx, %eax @@ -216,30 +212,21 @@ entry: } define i32 @test_load_ptr64(i32 addrspace(272)* %i) { -; CHECK-LABEL: test_load_ptr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl (%rcx), %eax -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_load_ptr64: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl (%rcx), %eax -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_load_ptr64: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl (%rcx), %eax +; ALL-NEXT: retq entry: %0 = load i32, i32 addrspace(272)* %i, align 8 ret i32 %0 } define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) { -; CHECK-LABEL: test_store_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movslq %ecx, %rax -; CHECK-NEXT: movl %edx, (%rax) -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_store_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movslq %ecx, %rax -; CHECK-O0-NEXT: movl %edx, (%rax) -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_store_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movslq %ecx, %rax +; ALL-NEXT: movl %edx, (%rax) +; ALL-NEXT: retq entry: store i32 %i, i32 addrspace(270)* %s, align 4 ret void @@ -251,6 +238,7 @@ define void @test_store_uptr32(i32 addrspace(271)* %s, i32 %i) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq +; ; CHECK-O0-LABEL: test_store_uptr32: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %ecx, %eax @@ -263,14 +251,10 @@ entry: } define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) { -; CHECK-LABEL: test_store_ptr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edx, (%rcx) -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_store_ptr64: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl %edx, (%rcx) -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_store_ptr64: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl %edx, (%rcx) +; ALL-NEXT: retq entry: store i32 %i, i32 addrspace(272)* %s, align 8 ret void diff --git a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll index 2a129bc643b36..a1fc3f9831769 100644 --- a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll +++ b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s ; RUN: llc -O0 < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -no-x86-call-frame-opt | FileCheck %s -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck -check-prefix=ATOM %s -; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling. +; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1] @@ -17,18 +16,6 @@ entry: ; CHECK-NOT: movl ; CHECK: addl %ebx, %eax -; On Intel Atom the scheduler moves a movl instruction -; used for the printf call to follow movl 24(%esp), %eax -; ATOM: movl 24(%esp), %eax -; ATOM-NOT: movl -; ATOM: movl %eax, 36(%esp) -; ATOM: movl -; ATOM: movl 28(%esp), %ebx -; ATOM-NOT: movl -; ATOM: movl %ebx, 40(%esp) -; ATOM-NOT: movl -; ATOM: addl %ebx, %eax - %retval = alloca i32 ; [#uses=2] %"%ebx" = alloca i32 ; [#uses=1] %"%eax" = alloca i32 ; [#uses=2] diff --git a/llvm/test/CodeGen/X86/pr11415.ll b/llvm/test/CodeGen/X86/pr11415.ll index b3d9b2ff4839c..ee632189ef9ce 100644 --- a/llvm/test/CodeGen/X86/pr11415.ll +++ b/llvm/test/CodeGen/X86/pr11415.ll @@ -6,12 +6,11 @@ ; CHECK: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: movq %rax, -8(%rsp) -; CHECK-NEXT: movq -8(%rsp), %rdx +; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rcx, -8(%rsp) +; CHECK-NEXT: movq -8(%rsp), %rax ; CHECK-NEXT: ret define i64 @foo() { diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll index d1148eecb0da9..978164fdafdbc 100644 --- a/llvm/test/CodeGen/X86/pr1489.ll +++ b/llvm/test/CodeGen/X86/pr1489.ll @@ -110,28 +110,25 @@ define i32 @main() nounwind { ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $48, %esp +; CHECK-NEXT: subl $32, %esp ; CHECK-NEXT: calll _baz -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: calll _bar -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: calll _foo ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: calll _quux -; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; CHECK-NEXT: movl %edx, 16(%ecx) -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: movl %esi, 12(%ecx) -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl %edi, 8(%ecx) -; CHECK-NEXT: movl %eax, 4(%ecx) -; CHECK-NEXT: movl $_.str, (%ecx) +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %edi, 16(%eax) +; CHECK-NEXT: movl %esi, 12(%eax) +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl %ecx, 4(%eax) +; CHECK-NEXT: movl $_.str, (%eax) ; CHECK-NEXT: calll _printf -; CHECK-NEXT: ## implicit-def: $ecx -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: addl $48, %esp +; CHECK-NEXT: ## implicit-def: $eax +; CHECK-NEXT: addl $32, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll index 7455584ac698a..a925bb8dfd6a2 100644 --- a/llvm/test/CodeGen/X86/pr27591.ll +++ b/llvm/test/CodeGen/X86/pr27591.ll @@ -9,9 +9,8 @@ define void @test1(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movzbl %al, %edi +; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: callq callee1 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq @@ -27,10 +26,9 @@ define void @test2(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: negl %eax -; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movzbl %al, %edi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: negl %edi ; CHECK-NEXT: callq callee2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr30430.ll b/llvm/test/CodeGen/X86/pr30430.ll index e524245daa112..7f771c955fbea 100644 --- a/llvm/test/CodeGen/X86/pr30430.ll +++ b/llvm/test/CodeGen/X86/pr30430.ll @@ -12,13 +12,13 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: andq $-64, %rsp ; CHECK-NEXT: subq $256, %rsp # imm = 0x100 ; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) @@ -27,75 +27,75 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[0] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0] +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] -; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 -; CHECK-NEXT: # implicit-def: $zmm2 -; CHECK-NEXT: vmovaps %ymm1, %ymm2 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[0] +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm3, %xmm0 +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 +; CHECK-NEXT: # implicit-def: $zmm0 +; CHECK-NEXT: vmovaps %ymm2, %ymm0 +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; CHECK-NEXT: movq %rbp, %rsp diff --git a/llvm/test/CodeGen/X86/pr30813.ll b/llvm/test/CodeGen/X86/pr30813.ll index 7266c5bd8d015..e3e096bda6c28 100644 --- a/llvm/test/CodeGen/X86/pr30813.ll +++ b/llvm/test/CodeGen/X86/pr30813.ll @@ -1,8 +1,9 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s ; CHECK: patatino: ; CHECK: .cfi_startproc -; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]] -; CHECK: movq %r[[REG0]], ({{%r[abcd]x}}) +; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]] +; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x +; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}}) ; CHECK: retq define void @patatino() { diff --git a/llvm/test/CodeGen/X86/pr32241.ll b/llvm/test/CodeGen/X86/pr32241.ll index 1f3d273dfc416..6fb770b4a75e7 100644 --- a/llvm/test/CodeGen/X86/pr32241.ll +++ b/llvm/test/CodeGen/X86/pr32241.ll @@ -10,10 +10,10 @@ define i32 @_Z3foov() { ; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376 ; CHECK-NEXT: movw $19417, {{[0-9]+}}(%esp) # imm = 0x4BD9 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %lor.rhs ; CHECK-NEXT: xorl %eax, %eax @@ -21,11 +21,11 @@ define i32 @_Z3foov() { ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %lor.end -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: cmpl %eax, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll index 533473663d73b..cb9b33ca907b8 100644 --- a/llvm/test/CodeGen/X86/pr32284.ll +++ b/llvm/test/CodeGen/X86/pr32284.ll @@ -178,17 +178,8 @@ define void @f1() { ; ; 686-O0-LABEL: f1: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %ebx -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 16 ; 686-O0-NEXT: subl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 17 -; 686-O0-NEXT: .cfi_offset %esi, -16 -; 686-O0-NEXT: .cfi_offset %edi, -12 -; 686-O0-NEXT: .cfi_offset %ebx, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 5 ; 686-O0-NEXT: movl var_5, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: sarl $31, %ecx @@ -197,33 +188,27 @@ define void @f1() { ; 686-O0-NEXT: orl %ecx, %eax ; 686-O0-NEXT: setne (%esp) ; 686-O0-NEXT: movl var_5, %ecx +; 686-O0-NEXT: movl %ecx, %eax +; 686-O0-NEXT: sarl $31, %eax ; 686-O0-NEXT: movl %ecx, %edx -; 686-O0-NEXT: sarl $31, %edx -; 686-O0-NEXT: movl %ecx, %esi -; 686-O0-NEXT: subl $-1, %esi -; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %edi +; 686-O0-NEXT: subl $-1, %edx +; 686-O0-NEXT: sete %dl +; 686-O0-NEXT: movzbl %dl, %edx ; 686-O0-NEXT: addl $7093, %ecx # imm = 0x1BB5 -; 686-O0-NEXT: adcl $0, %edx -; 686-O0-NEXT: subl %edi, %ecx -; 686-O0-NEXT: sbbl $0, %edx -; 686-O0-NEXT: setl %bl -; 686-O0-NEXT: movzbl %bl, %edi -; 686-O0-NEXT: movl %edi, var_57 +; 686-O0-NEXT: adcl $0, %eax +; 686-O0-NEXT: subl %edx, %ecx +; 686-O0-NEXT: sbbl $0, %eax +; 686-O0-NEXT: setl %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, var_57 ; 686-O0-NEXT: movl $0, var_57+4 -; 686-O0-NEXT: movl var_5, %edi -; 686-O0-NEXT: subl $-1, %edi -; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %ebx -; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E +; 686-O0-NEXT: movl var_5, %eax +; 686-O0-NEXT: subl $-1, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 ; 686-O0-NEXT: addl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 16 -; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %ebx ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -321,9 +306,9 @@ define void @f2() { ; X86-O0-NEXT: sete %al ; X86-O0-NEXT: andb $1, %al ; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax -; X86-O0-NEXT: # implicit-def: $rcx -; X86-O0-NEXT: movw %ax, (%rcx) +; X86-O0-NEXT: movw %ax, %cx +; X86-O0-NEXT: # implicit-def: $rax +; X86-O0-NEXT: movw %cx, (%rax) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -368,9 +353,9 @@ define void @f2() { ; 686-O0-NEXT: sete %al ; 686-O0-NEXT: andb $1, %al ; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax -; 686-O0-NEXT: # implicit-def: $ecx -; 686-O0-NEXT: movw %ax, (%ecx) +; 686-O0-NEXT: movw %ax, %cx +; 686-O0-NEXT: # implicit-def: $eax +; 686-O0-NEXT: movw %cx, (%eax) ; 686-O0-NEXT: addl $2, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl @@ -488,18 +473,18 @@ define void @f3() #0 { ; 686-O0-NEXT: andl $-8, %esp ; 686-O0-NEXT: subl $16, %esp ; 686-O0-NEXT: .cfi_offset %esi, -12 -; 686-O0-NEXT: movl var_13, %eax -; 686-O0-NEXT: movl %eax, %ecx -; 686-O0-NEXT: notl %ecx -; 686-O0-NEXT: testl %eax, %eax -; 686-O0-NEXT: sete %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: movl var_16, %edx -; 686-O0-NEXT: movl %ecx, %esi -; 686-O0-NEXT: xorl %edx, %esi -; 686-O0-NEXT: andl %esi, %eax -; 686-O0-NEXT: orl %eax, %ecx -; 686-O0-NEXT: movl %ecx, (%esp) +; 686-O0-NEXT: movl var_13, %ecx +; 686-O0-NEXT: movl %ecx, %eax +; 686-O0-NEXT: notl %eax +; 686-O0-NEXT: testl %ecx, %ecx +; 686-O0-NEXT: sete %cl +; 686-O0-NEXT: movzbl %cl, %ecx +; 686-O0-NEXT: movl var_16, %esi +; 686-O0-NEXT: movl %eax, %edx +; 686-O0-NEXT: xorl %esi, %edx +; 686-O0-NEXT: andl %edx, %ecx +; 686-O0-NEXT: orl %ecx, %eax +; 686-O0-NEXT: movl %eax, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: notl %eax diff --git a/llvm/test/CodeGen/X86/pr32340.ll b/llvm/test/CodeGen/X86/pr32340.ll index 98685b959f642..15774d605e62f 100644 --- a/llvm/test/CodeGen/X86/pr32340.ll +++ b/llvm/test/CodeGen/X86/pr32340.ll @@ -16,26 +16,26 @@ define void @foo() { ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: # kill: def $rax killed $eax ; X64-NEXT: movw $0, var_825 -; X64-NEXT: movzwl var_32, %ecx -; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: xorl %edx, %esi -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: xorl %esi, %edx -; X64-NEXT: addl %ecx, %edx -; X64-NEXT: movslq %edx, %rcx +; X64-NEXT: movzwl var_32, %edx +; X64-NEXT: movzwl var_901, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: xorl %ecx, %esi +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: xorl %esi, %ecx +; X64-NEXT: addl %edx, %ecx +; X64-NEXT: movslq %ecx, %rcx ; X64-NEXT: movq %rcx, var_826 ; X64-NEXT: movzwl var_32, %ecx ; X64-NEXT: # kill: def $rcx killed $ecx ; X64-NEXT: movzwl var_901, %edx ; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %rsi, %rdx -; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: movslq %edx, %rsi +; X64-NEXT: movabsq $-1142377792914660288, %rdx # imm = 0xF02575732E06E440 ; X64-NEXT: xorq %rdx, %rsi -; X64-NEXT: xorq $-1, %rsi -; X64-NEXT: xorq %rsi, %rcx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: xorq %rsi, %rdx +; X64-NEXT: xorq $-1, %rdx +; X64-NEXT: xorq %rdx, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: orq var_57, %rdx ; X64-NEXT: orq %rdx, %rcx diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll index 165e0292d4648..2182e8b4f901a 100644 --- a/llvm/test/CodeGen/X86/pr32345.ll +++ b/llvm/test/CodeGen/X86/pr32345.ll @@ -29,9 +29,9 @@ define void @foo() { ; X640-NEXT: # kill: def $rcx killed $ecx ; X640-NEXT: # kill: def $cl killed $rcx ; X640-NEXT: sarq %cl, %rax -; X640-NEXT: # kill: def $al killed $al killed $rax -; X640-NEXT: # implicit-def: $rcx -; X640-NEXT: movb %al, (%rcx) +; X640-NEXT: movb %al, %cl +; X640-NEXT: # implicit-def: $rax +; X640-NEXT: movb %cl, (%rax) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -43,44 +43,44 @@ define void @foo() { ; 6860-NEXT: .cfi_def_cfa_register %ebp ; 6860-NEXT: andl $-8, %esp ; 6860-NEXT: subl $24, %esp -; 6860-NEXT: movw var_22, %ax +; 6860-NEXT: movw var_22, %dx ; 6860-NEXT: movzwl var_27, %ecx -; 6860-NEXT: movw %cx, %dx -; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edx -; 6860-NEXT: movw %ax, %dx -; 6860-NEXT: xorl %ecx, %edx -; 6860-NEXT: # kill: def $dx killed $dx killed $edx -; 6860-NEXT: movzwl %dx, %eax +; 6860-NEXT: movw %cx, %ax +; 6860-NEXT: xorw %ax, %dx +; 6860-NEXT: # implicit-def: $eax +; 6860-NEXT: movw %dx, %ax +; 6860-NEXT: xorl %ecx, %eax +; 6860-NEXT: # kill: def $ax killed $ax killed $eax +; 6860-NEXT: movzwl %ax, %eax ; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) -; 6860-NEXT: movw var_22, %ax -; 6860-NEXT: movzwl var_27, %ecx -; 6860-NEXT: movw %cx, %dx -; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edx -; 6860-NEXT: movw %ax, %dx -; 6860-NEXT: xorl %ecx, %edx -; 6860-NEXT: # kill: def $dx killed $dx killed $edx -; 6860-NEXT: movzwl %dx, %eax -; 6860-NEXT: # kill: def $cl killed $cl killed $ecx +; 6860-NEXT: movw var_22, %dx +; 6860-NEXT: movzwl var_27, %eax +; 6860-NEXT: movw %ax, %cx +; 6860-NEXT: xorw %cx, %dx +; 6860-NEXT: # implicit-def: $ecx +; 6860-NEXT: movw %dx, %cx +; 6860-NEXT: xorl %eax, %ecx +; 6860-NEXT: # kill: def $cx killed $cx killed $ecx +; 6860-NEXT: movzwl %cx, %edx +; 6860-NEXT: movb %al, %cl ; 6860-NEXT: addb $30, %cl -; 6860-NEXT: xorl %edx, %edx ; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; 6860-NEXT: shrdl %cl, %edx, %eax +; 6860-NEXT: xorl %eax, %eax +; 6860-NEXT: shrdl %cl, %eax, %edx ; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: testb $32, %cl ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: .LBB0_2: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; 6860-NEXT: # kill: def $al killed $al killed $eax -; 6860-NEXT: # implicit-def: $ecx -; 6860-NEXT: movb %al, (%ecx) +; 6860-NEXT: movb %al, %cl +; 6860-NEXT: # implicit-def: $eax +; 6860-NEXT: movb %cl, (%eax) ; 6860-NEXT: movl %ebp, %esp ; 6860-NEXT: popl %ebp ; 6860-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll index 3b1997234ce55..f12e85b9a177d 100644 --- a/llvm/test/CodeGen/X86/pr32451.ll +++ b/llvm/test/CodeGen/X86/pr32451.ll @@ -9,24 +9,24 @@ target triple = "x86_64-unknown-linux-gnu" define i8** @japi1_convert_690(i8**, i8***, i32) { ; CHECK-LABEL: japi1_convert_690: ; CHECK: # %bb.0: # %top -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll julia.gc_root_decl ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_get_ptls_states -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl 4(%ecx), %edx -; CHECK-NEXT: movb (%edx), %dl -; CHECK-NEXT: andb $1, %dl -; CHECK-NEXT: movzbl %dl, %edx -; CHECK-NEXT: movl %edx, (%esp) -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: # kill: def $ecx killed $eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movl 4(%eax), %eax +; CHECK-NEXT: movb (%eax), %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll jl_box_int32 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %eax, (%ecx) -; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl top: diff --git a/llvm/test/CodeGen/X86/pr32484.ll b/llvm/test/CodeGen/X86/pr32484.ll index ef504eee6e8bc..0df1c4b545078 100644 --- a/llvm/test/CodeGen/X86/pr32484.ll +++ b/llvm/test/CodeGen/X86/pr32484.ll @@ -8,9 +8,9 @@ define void @foo() { ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: # implicit-def: $rax -; CHECK-NEXT: movdqu %xmm1, (%rax) +; CHECK-NEXT: movdqu %xmm0, (%rax) ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq indirectbr i8* undef, [label %9, label %1] diff --git a/llvm/test/CodeGen/X86/pr34592.ll b/llvm/test/CodeGen/X86/pr34592.ll index 25b068c8fad6f..3c5345bf3411c 100644 --- a/llvm/test/CodeGen/X86/pr34592.ll +++ b/llvm/test/CodeGen/X86/pr34592.ll @@ -10,44 +10,42 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1 ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $160, %rsp -; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 -; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 -; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 -; CHECK-NEXT: vmovaps 144(%rbp), %ymm11 -; CHECK-NEXT: vmovaps 112(%rbp), %ymm12 -; CHECK-NEXT: vmovaps 80(%rbp), %ymm13 -; CHECK-NEXT: vmovaps 48(%rbp), %ymm14 -; CHECK-NEXT: vmovaps 16(%rbp), %ymm15 -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7] -; CHECK-NEXT: vmovaps %ymm9, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: # implicit-def: $ymm0 -; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 -; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0] -; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: vmovaps %ymm4, %ymm10 +; CHECK-NEXT: vmovaps %ymm3, %ymm9 +; CHECK-NEXT: vmovaps %ymm1, %ymm8 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vmovaps 240(%rbp), %ymm1 +; CHECK-NEXT: vmovaps 208(%rbp), %ymm3 +; CHECK-NEXT: vmovaps 176(%rbp), %ymm0 +; CHECK-NEXT: vmovaps 144(%rbp), %ymm0 +; CHECK-NEXT: vmovaps 112(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 80(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 48(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 16(%rbp), %ymm11 +; CHECK-NEXT: vpblendd {{.*#+}} ymm4 = ymm6[0,1,2,3,4,5],ymm2[6,7] +; CHECK-NEXT: vmovaps %xmm3, %xmm8 +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vinserti128 $1, %xmm8, %ymm2, %ymm2 +; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm4[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm4[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,0] +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5],ymm0[6,7] ; CHECK-NEXT: vmovaps %xmm7, %xmm2 -; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; CHECK-NEXT: # implicit-def: $ymm9 -; CHECK-NEXT: vmovaps %xmm2, %xmm9 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7] -; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] -; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3] -; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] -; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7] -; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm7 -; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero -; CHECK-NEXT: # implicit-def: $ymm8 -; CHECK-NEXT: vmovaps %xmm7, %xmm8 -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1] -; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm5, %ymm1 -; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm9, %ymm3 +; CHECK-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm4, %xmm2 +; CHECK-NEXT: vpalignr {{.*#+}} ymm3 = ymm3[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm3[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,0,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm3 = ymm2[0,1,2,3],ymm3[4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm7[0,1],ymm1[2,3],ymm7[4,5,6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,1,3] +; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm5[0,1,0,1,4,5,4,5] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7] +; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm2 +; CHECK-NEXT: vmovq {{.*#+}} xmm4 = xmm2[0],zero +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm4, %xmm2 +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm2[0,1],ymm6[0,1] ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll index 2f63ac311f2eb..f341a9a6c6741 100644 --- a/llvm/test/CodeGen/X86/pr34653.ll +++ b/llvm/test/CodeGen/X86/pr34653.ll @@ -12,52 +12,46 @@ define void @pr34653() { ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-512, %rsp # imm = 0xFE00 -; CHECK-NEXT: subq $1536, %rsp # imm = 0x600 -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: subq $1024, %rsp # imm = 0x400 +; CHECK-NEXT: movq %rsp, %rdi ; CHECK-NEXT: callq test ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm19 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm20 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm21 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm22 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm23 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm24 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm25 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm26 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm27 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm28 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm29 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm30 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm31 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr39733.ll b/llvm/test/CodeGen/X86/pr39733.ll index 31bd5b71d0a6e..4a940806c9b70 100644 --- a/llvm/test/CodeGen/X86/pr39733.ll +++ b/llvm/test/CodeGen/X86/pr39733.ll @@ -17,13 +17,13 @@ define void @test55() { ; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 +; CHECK-NEXT: vpmovsxwd %xmm1, %xmm2 +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; CHECK-NEXT: vpmovsxwd %xmm1, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK-NEXT: vmovdqa %ymm0, (%rsp) ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr42452.ll b/llvm/test/CodeGen/X86/pr42452.ll index d3a1dad42bd39..14a6f3d133007 100644 --- a/llvm/test/CodeGen/X86/pr42452.ll +++ b/llvm/test/CodeGen/X86/pr42452.ll @@ -6,12 +6,12 @@ define void @foo(i1 %c, <2 x i64> %x) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $dil killed $dil killed $edi +; CHECK-NEXT: movb %dil, %al +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; CHECK-NEXT: movq %xmm0, %rcx -; CHECK-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload diff --git a/llvm/test/CodeGen/X86/pr44749.ll b/llvm/test/CodeGen/X86/pr44749.ll index 1012d8c723b13..daf7e25884a46 100644 --- a/llvm/test/CodeGen/X86/pr44749.ll +++ b/llvm/test/CodeGen/X86/pr44749.ll @@ -4,33 +4,29 @@ define i32 @a() { ; CHECK-LABEL: a: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax -; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _b ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 ; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax ; CHECK-NEXT: subq $-1, %rax -; CHECK-NEXT: setne %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $rcx killed $ecx -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: setne %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rax ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: setae %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $rcx killed $ecx -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: setae %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rax ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %ecx -; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: cvttsd2si %xmm0, %eax +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq entry: %call = call i32 (...) @b() diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll index 083aa780a07c2..c2d9317a95ea6 100755 --- a/llvm/test/CodeGen/X86/pr47000.ll +++ b/llvm/test/CodeGen/X86/pr47000.ll @@ -12,124 +12,124 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $124, %esp -; CHECK-NEXT: movl 144(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movw 176(%esp), %dx -; CHECK-NEXT: movw 172(%esp), %si -; CHECK-NEXT: movw 168(%esp), %di -; CHECK-NEXT: movw 164(%esp), %bx -; CHECK-NEXT: movw 160(%esp), %bp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movw 156(%esp), %ax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 152(%esp), %ax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %cx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 148(%esp), %ax -; CHECK-NEXT: movw %ax, 112(%esp) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 114(%esp) +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 116(%esp) -; CHECK-NEXT: movw %bp, 118(%esp) -; CHECK-NEXT: movw %dx, 110(%esp) -; CHECK-NEXT: movw %si, 108(%esp) -; CHECK-NEXT: movw %di, 106(%esp) -; CHECK-NEXT: movw %bx, 104(%esp) -; CHECK-NEXT: movzwl 118(%esp), %edx -; CHECK-NEXT: movzwl 116(%esp), %esi -; CHECK-NEXT: movzwl 114(%esp), %edi -; CHECK-NEXT: movzwl 112(%esp), %ebx -; CHECK-NEXT: movzwl 110(%esp), %ebp -; CHECK-NEXT: movzwl 108(%esp), %eax +; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movzwl 106(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movzwl 104(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl %ebx, (%eax) -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: calll __gnu_h2f_ieee ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movw %ax, %si +; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movw %ax, %di +; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movw %ax, 6(%ecx) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 4(%ecx) ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload -; CHECK-NEXT: movw %dx, 2(%ecx) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload -; CHECK-NEXT: movw %si, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movw %ax, %bx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movw %bx, 6(%ecx) +; CHECK-NEXT: movw %di, 4(%ecx) +; CHECK-NEXT: movw %si, 2(%ecx) +; CHECK-NEXT: movw %dx, (%ecx) ; CHECK-NEXT: addl $124, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir index 2821f00940ecf..f6b4536cbbc4b 100644 --- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir +++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir @@ -4,7 +4,7 @@ # Bug 41973. Make sure %12 is detected as live out of %bb.0, even # though the use is allocated before the def block %bb.3. Previously # mayLiveOut only recorded on defs, and would not find the virtual -# register use if it had already been replace with a physical +# register use if it had already been replaced with a physical # register. --- @@ -21,11 +21,11 @@ body: | ; CHECK: successors: ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) - ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags - ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit - ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0) + ; CHECK: $rcx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK: renamable $eax = MOV32r0 implicit-def dead $eflags + ; CHECK: renamable $rax = SUBREG_TO_REG 0, killed renamable $eax, %subreg.sub_32bit + ; CHECK: MOV64mi32 killed renamable $rcx, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rax :: (store 8 into %stack.0) ; CHECK: bb.3: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) @@ -46,7 +46,6 @@ body: | bb.1: successors: - bb.2: %0:gr64 = COPY %12 %10:gr32 = MOV32r0 implicit-def $eflags diff --git a/llvm/test/CodeGen/X86/stack-protector-msvc.ll b/llvm/test/CodeGen/X86/stack-protector-msvc.ll index c1f79f9db2f6f..7679bb4f77caf 100644 --- a/llvm/test/CodeGen/X86/stack-protector-msvc.ll +++ b/llvm/test/CodeGen/X86/stack-protector-msvc.ll @@ -48,9 +48,8 @@ return: ; preds = %entry ; MSVC-X86-O0: xorl %esp, %[[REG1]] ; MSVC-X86-O0: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) ; MSVC-X86-O0: calll _strcpy -; MSVC-X86-O0: movl [[SLOT]](%esp), %[[REG1:[^ ]*]] -; MSVC-X86-O0: xorl %esp, %[[REG1]] -; MSVC-X86-O0: movl %[[REG1]], %ecx +; MSVC-X86-O0: movl [[SLOT]](%esp), %ecx +; MSVC-X86-O0: xorl %esp, %ecx ; MSVC-X86-O0: calll @__security_check_cookie@4 ; MSVC-X86-O0: retl @@ -59,9 +58,8 @@ return: ; preds = %entry ; MSVC-X64-O0: xorq %rsp, %[[REG1]] ; MSVC-X64-O0: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) ; MSVC-X64-O0: callq strcpy -; MSVC-X64-O0: movq [[SLOT]](%rsp), %[[REG1:[^ ]*]] -; MSVC-X64-O0: xorq %rsp, %[[REG1]] -; MSVC-X64-O0: movq %[[REG1]], %rcx +; MSVC-X64-O0: movq [[SLOT]](%rsp), %rcx +; MSVC-X64-O0: xorq %rsp, %rcx ; MSVC-X64-O0: callq __security_check_cookie ; MSVC-X64-O0: retq diff --git a/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll b/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll index f5647c341e733..ccc4b34ae930f 100644 --- a/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll +++ b/llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll @@ -14,7 +14,7 @@ define dso_local i32 @main(i32 %argc, i8** %argv, ...) #0 { ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: subq $336, %rsp ## imm = 0x150 +; CHECK-NEXT: subq $320, %rsp ## imm = 0x140 ; CHECK-NEXT: movq ___security_cookie@{{.*}}(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq %rax, -8(%rbp) @@ -25,10 +25,9 @@ define dso_local i32 @main(i32 %argc, i8** %argv, ...) #0 { ; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ; CHECK-NEXT: callq _printf ; CHECK-NEXT: movq -8(%rbp), %rcx -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: callq ___security_check_cookie ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: addq $336, %rsp ## imm = 0x150 +; CHECK-NEXT: addq $320, %rsp ## imm = 0x140 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll index 4934419055acd..11312f08edfa3 100644 --- a/llvm/test/CodeGen/X86/swift-return.ll +++ b/llvm/test/CodeGen/X86/swift-return.ll @@ -79,16 +79,15 @@ define i32 @test2(i32 %key) #0 { ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movq %rsp, %rax ; CHECK-O0-NEXT: callq gen2 -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-O0-NEXT: movl (%rsp), %esi +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi +; CHECK-O0-NEXT: movl (%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi -; CHECK-O0-NEXT: addl %edi, %esi -; CHECK-O0-NEXT: addl %edx, %esi -; CHECK-O0-NEXT: addl %ecx, %esi -; CHECK-O0-NEXT: addl %eax, %esi -; CHECK-O0-NEXT: movl %esi, %eax +; CHECK-O0-NEXT: addl %edi, %eax +; CHECK-O0-NEXT: addl %esi, %eax +; CHECK-O0-NEXT: addl %edx, %eax +; CHECK-O0-NEXT: addl %ecx, %eax ; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -418,10 +417,10 @@ define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { ; ; CHECK-O0-LABEL: gen7: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movl %edi, %eax -; CHECK-O0-NEXT: movl %edi, %edx -; CHECK-O0-NEXT: movl %edi, %ecx ; CHECK-O0-NEXT: movl %edi, %r8d +; CHECK-O0-NEXT: movl %r8d, %eax +; CHECK-O0-NEXT: movl %r8d, %edx +; CHECK-O0-NEXT: movl %r8d, %ecx ; CHECK-O0-NEXT: retq %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 @@ -441,10 +440,10 @@ define swiftcc { i64, i64, i64, i64 } @gen8(i64 %key) { ; ; CHECK-O0-LABEL: gen8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq %rdi, %rax -; CHECK-O0-NEXT: movq %rdi, %rdx -; CHECK-O0-NEXT: movq %rdi, %rcx ; CHECK-O0-NEXT: movq %rdi, %r8 +; CHECK-O0-NEXT: movq %r8, %rax +; CHECK-O0-NEXT: movq %r8, %rdx +; CHECK-O0-NEXT: movq %r8, %rcx ; CHECK-O0-NEXT: retq %v0 = insertvalue { i64, i64, i64, i64 } undef, i64 %key, 0 %v1 = insertvalue { i64, i64, i64, i64 } %v0, i64 %key, 1 @@ -464,11 +463,10 @@ define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { ; ; CHECK-O0-LABEL: gen9: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $dil killed $dil killed $edi -; CHECK-O0-NEXT: movb %dil, %al -; CHECK-O0-NEXT: movb %dil, %dl -; CHECK-O0-NEXT: movb %dil, %cl ; CHECK-O0-NEXT: movb %dil, %r8b +; CHECK-O0-NEXT: movb %r8b, %al +; CHECK-O0-NEXT: movb %r8b, %dl +; CHECK-O0-NEXT: movb %r8b, %cl ; CHECK-O0-NEXT: retq %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 @@ -490,17 +488,14 @@ define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(dou ; ; CHECK-O0-LABEL: gen10: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload -; CHECK-O0-NEXT: # xmm1 = mem[0],zero -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Reload -; CHECK-O0-NEXT: # xmm2 = mem[0],zero -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Reload -; CHECK-O0-NEXT: # xmm3 = mem[0],zero -; CHECK-O0-NEXT: movq %rdi, %rax -; CHECK-O0-NEXT: movq %rdi, %rdx -; CHECK-O0-NEXT: movq %rdi, %rcx ; CHECK-O0-NEXT: movq %rdi, %r8 +; CHECK-O0-NEXT: movaps %xmm0, %xmm3 +; CHECK-O0-NEXT: movaps %xmm3, %xmm0 +; CHECK-O0-NEXT: movaps %xmm3, %xmm1 +; CHECK-O0-NEXT: movaps %xmm3, %xmm2 +; CHECK-O0-NEXT: movq %r8, %rax +; CHECK-O0-NEXT: movq %r8, %rdx +; CHECK-O0-NEXT: movq %r8, %rcx ; CHECK-O0-NEXT: retq %v0 = insertvalue { double, double, double, double, i64, i64, i64, i64 } undef, double %keyd, 0 %v1 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v0, double %keyd, 1 @@ -569,13 +564,15 @@ define swiftcc { <4 x float>, float } @test12() #0 { ; ; CHECK-O0-LABEL: test12: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushq %rax -; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: subq $24, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-O0-NEXT: callq gen12 -; CHECK-O0-NEXT: addps %xmm1, %xmm0 -; CHECK-O0-NEXT: addps %xmm2, %xmm0 +; CHECK-O0-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; CHECK-O0-NEXT: movaps %xmm3, %xmm1 -; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: movaps (%rsp), %xmm3 # 16-byte Reload +; CHECK-O0-NEXT: addps %xmm3, %xmm0 +; CHECK-O0-NEXT: addps %xmm2, %xmm0 +; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll index 1afae31b2b8d2..8877cd4108596 100644 --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -18,8 +18,9 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-O0-LABEL: foo: ; CHECK-O0: movl $16 ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) ; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0: movb $1, 8(%rax) + entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -121,19 +122,17 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE: ret ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: cmpl $0 ; spill to stack ; CHECK-O0: movq %r12, {{.*}}(%rsp) +; CHECK-O0: cmpl $0 ; CHECK-O0: je ; CHECK-O0: movl $16, ; CHECK-O0: malloc -; CHECK-O0: movq %rax, [[ID:%[a-z]+]] +; CHECK-O0: movq %rax, %r12 ; CHECK-O0-DAG: movb $1, 8(%rax) -; CHECK-O0-DAG: movq [[ID]], %r12 ; CHECK-O0: ret ; reload from stack -; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq {{.*}}(%rsp), %r12 ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -177,8 +176,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0: movb $1, 8([[ID]]) ; CHECK-O0: jbe ; reload from stack -; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z0-9]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq {{.*}}(%rsp), %r12 ; CHECK-O0: ret entry: br label %bb_loop @@ -218,16 +216,15 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK-APPLE-NOT: x19 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: movl $16, ; spill sret to stack ; CHECK-O0: movq %rdi, -; CHECK-O0: movq {{.*}}, %rdi +; CHECK-O0: movl $16, ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) -; CHECK-O0: movl %{{.*}}, 4(%{{.*}}) -; CHECK-O0: movq %{{.*}}, %r12 ; reload sret from stack ; CHECK-O0: movq {{.*}}(%rsp), %rax +; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0: movb $1, 8(%rcx) +; CHECK-O0: movl %{{.*}}, 4(%{{.*}}) ; CHECK-O0: ret entry: %call = call i8* @malloc(i64 16) @@ -256,8 +253,8 @@ define float @caller3(i8* %error_ref) { ; CHECK-O0-LABEL: caller3: ; CHECK-O0: xorl ; CHECK-O0: movl {{.*}}, %r12d +; CHECK-O0: leaq {{.*}}, %rdi ; CHECK-O0: movl $1, %esi -; CHECK-O0: movq {{.*}}, %rdi ; CHECK-O0: callq {{.*}}foo_sret ; CHECK-O0: movq %r12, ; CHECK-O0: cmpq $0 @@ -387,8 +384,9 @@ define swiftcc float @foo_swiftcc(%swift_error** swifterror %error_ptr_ref) { ; CHECK-O0-LABEL: foo_swiftcc: ; CHECK-O0: movl $16 ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) ; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0: movb $1, 8(%rax) + entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -435,19 +433,17 @@ define swiftcc float @conditionally_forward_swifterror(%swift_error** swifterror ; CHECK-O0-LABEL: conditionally_forward_swifterror: ; CHECK-O0: pushq [[REG1:%[a-z0-9]+]] -; CHECK-O0: cmpl $0, %edi ; CHECK-O0-DAG: movq %r12, (%rsp) +; CHECK-O0: cmpl $0, %edi ; CHECK-O0: je -; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq (%rsp), %r12 ; CHECK-O0: callq _moo ; CHECK-O0: popq [[REG1]] ; CHECK-O0: retq -; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]] +; CHECK-O0: movq (%rsp), %r12 ; CHECK-O0: xorps %xmm0, %xmm0 -; CHECK-O0: movq [[REG]], %r12 ; CHECK-O0: popq [[REG1]] ; CHECK-O0: retq entry: @@ -745,10 +741,9 @@ a: ; CHECK-O0-LABEL: testAssign2 ; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]] ; CHECK-O0: jmp -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: retq +; CHECK-O0: movq [[SLOT]], %r12 +; CHECK-O0-NEXT: movq %r12, %rax +; CHECK-O0-NEXT: retq ; CHECK-APPLE-LABEL: testAssign2 ; CHECK-APPLE: movq %r12, %rax @@ -765,11 +760,10 @@ a: ; CHECK-O0-LABEL: testAssign3 ; CHECK-O0: callq _foo2 ; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: addq $24, %rsp -; CHECK-O0: retq +; CHECK-O0: movq [[SLOT]], %r12 +; CHECK-O0-NEXT: movq %r12, %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq ; CHECK-APPLE-LABEL: testAssign3 ; CHECK-APPLE: callq _foo2 @@ -792,10 +786,10 @@ a: ; CHECK-O0: xorl %eax, %eax ; CHECK-O0: ## kill: def $rax killed $eax ; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: retq +; CHECK-O0: movq [[SLOT]], %r12 +; CHECK-O0-NEXT: movq %r12, %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq ; CHECK-APPLE-LABEL: testAssign4 ; CHECK-APPLE: callq _foo2 diff --git a/llvm/test/CodeGen/X86/volatile.ll b/llvm/test/CodeGen/X86/volatile.ll index d6ed45a1909fa..a4f52dd4ca0ae 100644 --- a/llvm/test/CodeGen/X86/volatile.ll +++ b/llvm/test/CodeGen/X86/volatile.ll @@ -5,23 +5,14 @@ @x = external global double define void @foo() nounwind { -; OPT-LABEL: foo: -; OPT: # %bb.0: -; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; OPT-NEXT: xorps %xmm0, %xmm0 -; OPT-NEXT: movsd %xmm0, x -; OPT-NEXT: movsd %xmm0, x -; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; OPT-NEXT: retl -; -; NOOPT-LABEL: foo: -; NOOPT: # %bb.0: -; NOOPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; NOOPT-NEXT: xorps %xmm1, %xmm1 -; NOOPT-NEXT: movsd %xmm1, x -; NOOPT-NEXT: movsd %xmm1, x -; NOOPT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; NOOPT-NEXT: retl +; ALL-LABEL: foo: +; ALL: # %bb.0: +; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: xorps %xmm0, %xmm0 +; ALL-NEXT: movsd %xmm0, x +; ALL-NEXT: movsd %xmm0, x +; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: retl %a = load volatile double, double* @x store volatile double 0.0, double* @x store volatile double 0.0, double* @x diff --git a/llvm/test/CodeGen/X86/win64_eh.ll b/llvm/test/CodeGen/X86/win64_eh.ll index caadea4fe2e4b..ea795906a94c4 100644 --- a/llvm/test/CodeGen/X86/win64_eh.ll +++ b/llvm/test/CodeGen/X86/win64_eh.ll @@ -82,11 +82,11 @@ entry: } ; WIN64-LABEL: foo3: ; WIN64: .seh_proc foo3 -; NORM: subq $24, %rsp -; ATOM: leaq -24(%rsp), %rsp -; WIN64: .seh_stackalloc 24 +; NORM: subq $16, %rsp +; ATOM: leaq -16(%rsp), %rsp +; WIN64: .seh_stackalloc 16 ; WIN64: .seh_endprologue -; WIN64: addq $24, %rsp +; WIN64: addq $16, %rsp ; WIN64: ret ; WIN64: .seh_endproc diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll index af2d965772130..be325e2f0edcf 100644 --- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -42,9 +42,9 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %eco ; CHECK0-LABEL: test_isr_ecode ; CHECK0: pushl %ecx ; CHECK0: pushl %eax - ; CHECK0: movl 8(%esp), %eax - ; CHECK0: leal 12(%esp), %ecx - ; CHECK0: movl 8(%ecx), %ecx + ; CHECK0: movl 8(%esp), %ecx + ; CHECK0: leal 12(%esp), %eax + ; CHECK0: movl 8(%eax), %eax ; CHECK0: popl %eax ; CHECK0: popl %ecx ; CHECK0: addl $4, %esp diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll index 866108e3cd3cc..548f7100b028f 100644 --- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll @@ -43,9 +43,9 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %eco ; CHECK0: pushq %rax ; CHECK0: pushq %rax ; CHECK0: pushq %rcx - ; CHECK0: movq 24(%rsp), %rax - ; CHECK0: leaq 32(%rsp), %rcx - ; CHECK0: movq 16(%rcx), %rcx + ; CHECK0: movq 24(%rsp), %rcx + ; CHECK0: leaq 32(%rsp), %rax + ; CHECK0: movq 16(%rax), %rax ; CHECK0: popq %rcx ; CHECK0: popq %rax ; CHECK0: addq $16, %rsp diff --git a/llvm/test/DebugInfo/AArch64/frameindices.ll b/llvm/test/DebugInfo/AArch64/frameindices.ll index a74e6bad30588..b53fbf6fd0883 100644 --- a/llvm/test/DebugInfo/AArch64/frameindices.ll +++ b/llvm/test/DebugInfo/AArch64/frameindices.ll @@ -5,7 +5,7 @@ ; CHECK: DW_TAG_inlined_subroutine ; CHECK: "_Z3f111A" ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_fbreg -54, DW_OP_piece 0x7) +; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_breg31 WSP+42, DW_OP_piece 0x7) ; CHECK: DW_AT_abstract_origin {{.*}} "p1" ; ; long a; diff --git a/llvm/test/DebugInfo/AArch64/prologue_end.ll b/llvm/test/DebugInfo/AArch64/prologue_end.ll index bafbcf752aa44..660ce3aa43ab0 100644 --- a/llvm/test/DebugInfo/AArch64/prologue_end.ll +++ b/llvm/test/DebugInfo/AArch64/prologue_end.ll @@ -9,9 +9,8 @@ define void @prologue_end_test() nounwind uwtable !dbg !4 { ; CHECK: prologue_end_test: ; CHECK: .cfi_startproc - ; CHECK: sub sp, sp ; CHECK: stp x29, x30 - ; CHECK: add x29, sp + ; CHECK: mov x29, sp ; CHECK: .loc 1 3 3 prologue_end ; CHECK: bl _func ; CHECK: bl _func diff --git a/llvm/test/DebugInfo/ARM/prologue_end.ll b/llvm/test/DebugInfo/ARM/prologue_end.ll index 2c4922d1a8a43..5b50448ad80ff 100644 --- a/llvm/test/DebugInfo/ARM/prologue_end.ll +++ b/llvm/test/DebugInfo/ARM/prologue_end.ll @@ -11,7 +11,6 @@ define void @prologue_end_test() nounwind uwtable !dbg !4 { ; CHECK: prologue_end_test: ; CHECK: push {r7, lr} ; CHECK: {{mov r7, sp|add r7, sp}} - ; CHECK: sub sp ; CHECK: .loc 1 3 3 prologue_end ; CHECK: bl {{_func|Ltmp}} ; CHECK: bl {{_func|Ltmp}} diff --git a/llvm/test/DebugInfo/Mips/delay-slot.ll b/llvm/test/DebugInfo/Mips/delay-slot.ll index 8f444bce30fd1..07c2caa8999c3 100644 --- a/llvm/test/DebugInfo/Mips/delay-slot.ll +++ b/llvm/test/DebugInfo/Mips/delay-slot.ll @@ -14,10 +14,10 @@ ; CHECK: ------------------ ------ ------ ------ --- ------------- ------------- ; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt ; CHECK: 0x0000000000000004 2 0 1 0 0 is_stmt prologue_end -; CHECK: 0x0000000000000024 3 0 1 0 0 is_stmt -; CHECK: 0x0000000000000034 4 0 1 0 0 is_stmt +; CHECK: 0x0000000000000020 3 0 1 0 0 is_stmt +; CHECK: 0x0000000000000030 4 0 1 0 0 is_stmt ; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt -; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence +; CHECK: 0x0000000000000050 5 0 1 0 0 is_stmt end_sequence target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" diff --git a/llvm/test/DebugInfo/Mips/prologue_end.ll b/llvm/test/DebugInfo/Mips/prologue_end.ll index 7886b9b0485f4..de907cdc651f8 100644 --- a/llvm/test/DebugInfo/Mips/prologue_end.ll +++ b/llvm/test/DebugInfo/Mips/prologue_end.ll @@ -30,7 +30,7 @@ entry: ; PIC: addiu $[[R0]], $[[R0]], %lo(_gp_disp) ; PIC: addiu $sp, $sp, -{{[0-9]+}} ; PIC: sw $ra, {{[0-9]+}}($sp) -; PIC: addu $[[R1:[0-9]+]], $[[R0]], $25 +; PIC: addu $[[R1:[0-9]+|gp]], $[[R0]], $25 ; PIC: .loc 1 2 3 prologue_end ; PIC: lw $[[R2:[0-9]+]], %got($.str)($[[R1]]) @@ -40,7 +40,7 @@ entry: ; PIC-FP: sw $ra, {{[0-9]+}}($sp) ; PIC-FP: sw $fp, {{[0-9]+}}($sp) ; PIC-FP: move $fp, $sp -; PIC-FP: addu $[[R1:[0-9]+]], $[[R0]], $25 +; PIC-FP: addu $[[R1:[0-9]+|gp]], $[[R0]], $25 ; PIC-FP: .loc 1 2 3 prologue_end ; PIC-FP: lw $[[R2:[0-9]+]], %got($.str)($[[R1]]) diff --git a/llvm/test/DebugInfo/X86/dbg-declare-arg.ll b/llvm/test/DebugInfo/X86/dbg-declare-arg.ll index 1fa53462b8409..b2b88cb8b1b82 100644 --- a/llvm/test/DebugInfo/X86/dbg-declare-arg.ll +++ b/llvm/test/DebugInfo/X86/dbg-declare-arg.ll @@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.6.7" ; CHECK: DW_AT_name {{.*}}"j" ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] ( -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+16, DW_OP_deref) +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+8, DW_OP_deref) ; CHECK-NEXT: DW_AT_name {{.*}}"my_a" %class.A = type { i32, i32, i32, i32 } diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll index e8d8bd86f2fee..8174cabe29327 100644 --- a/llvm/test/DebugInfo/X86/fission-ranges.ll +++ b/llvm/test/DebugInfo/X86/fission-ranges.ll @@ -10,11 +10,11 @@ ; LiveDebugValues should produce DBG_VALUEs for variable "b" in successive ; blocks once we recognize that it is spilled. ; CHECK-MIR: ![[BDIVAR:[0-9]+]] = !DILocalVariable(name: "b" -; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus) +; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus) ; CHECK-MIR-LABEL: bb.6.for.inc13: -; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus) +; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus) ; CHECK-MIR-LABEL: bb.7.for.inc16: -; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus) +; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus) ; CHECK: .debug_info contents: @@ -46,20 +46,20 @@ ; CHECK: [[A]]: ; CHECK-NEXT: DW_LLE_startx_length (0x00000002, 0x0000000f): DW_OP_consts +0, DW_OP_stack_value -; CHECK-NEXT: DW_LLE_startx_length (0x00000003, 0x0000000f): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x00000012): DW_OP_breg7 RSP-8 +; CHECK-NEXT: DW_LLE_startx_length (0x00000003, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x00000012): DW_OP_breg7 RSP-4 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[E]]: -; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x00000009): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x00000062): DW_OP_breg7 RSP-44 +; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x0000005a): DW_OP_breg7 RSP-36 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[B]]: -; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000f): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000008, 0x00000042): DW_OP_breg7 RSP-32 +; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x00000008, 0x00000042): DW_OP_breg7 RSP-24 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[D]]: -; CHECK-NEXT: DW_LLE_startx_length (0x00000009, 0x0000000f): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x0000000a, 0x0000002a): DW_OP_breg7 RSP-20 +; CHECK-NEXT: DW_LLE_startx_length (0x00000009, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x0000000a, 0x0000002a): DW_OP_breg7 RSP-12 ; CHECK-NEXT: DW_LLE_end_of_list () ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo) @@ -81,7 +81,7 @@ ; V5RNGLISTS-NOT: DW_TAG ; V5RNGLISTS: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) ; V5RNGLISTS: .debug_rnglists contents: -; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000019, format = DWARF32, version = 0x0005, +; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000015, format = DWARF32, version = 0x0005, ; V5RNGLISTS-SAME: addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001 ; V5RNGLISTS-NEXT: offsets: [ ; V5RNGLISTS-NEXT: => 0x00000010 @@ -96,7 +96,7 @@ ; extern int c; ; static void foo (int p) ; { -; int a, b; +; int a, b; ; unsigned int d, e; ; for (a = 0; a < 30; a++) @@ -104,12 +104,12 @@ ; for (b = 0; b < 30; b++) ; for (e = 0; e < 30; e++) ; { -; int *w = &c; -; *w &= p; +; int *w = &c; +; *w &= p; ; } ; } -; void +; void ; bar () ; { ; foo (1); diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll index 1b49dc554f7ef..e357d3c9b02e5 100644 --- a/llvm/test/DebugInfo/X86/op_deref.ll +++ b/llvm/test/DebugInfo/X86/op_deref.ll @@ -6,10 +6,12 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref) ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 -; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") @@ -17,8 +19,8 @@ ; Check the DEBUG_VALUE comments for good measure. ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK ; vla should have a register-indirect address at one point. -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] -; ASM-CHECK: DW_OP_breg2 +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rax+0] +; ASM-CHECK: DW_OP_breg6 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT ; PRETTY-PRINT: DIExpression(DW_OP_deref) diff --git a/llvm/test/DebugInfo/X86/parameters.ll b/llvm/test/DebugInfo/X86/parameters.ll index 5f4edd5b963de..9b139eaffffc6 100644 --- a/llvm/test/DebugInfo/X86/parameters.ll +++ b/llvm/test/DebugInfo/X86/parameters.ll @@ -37,8 +37,8 @@ ; CHECK: DW_AT_location{{.*}}(DW_OP_fbreg +23) ; CHECK: DW_TAG_formal_parameter ; CHECK: DW_AT_location{{.*}}( -; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref -; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref) +; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref +; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref) ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name{{.*}} = "g" diff --git a/llvm/test/DebugInfo/X86/pieces-1.ll b/llvm/test/DebugInfo/X86/pieces-1.ll index c333f2b33f259..f614f78c65814 100644 --- a/llvm/test/DebugInfo/X86/pieces-1.ll +++ b/llvm/test/DebugInfo/X86/pieces-1.ll @@ -16,7 +16,7 @@ ; CHECK: .debug_loc contents: ; -; CHECK: (0x0000000000000000, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4 +; CHECK: (0x0000000000000006, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg0 RAX, DW_OP_piece 0x4 ; 0x0000000000000006 - 0x0000000000000008: rbp-8, piece 0x8, rax, piece 0x4 ) target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/X86/prologue-stack.ll b/llvm/test/DebugInfo/X86/prologue-stack.ll index 6072543861d35..299f10f18b48e 100644 --- a/llvm/test/DebugInfo/X86/prologue-stack.ll +++ b/llvm/test/DebugInfo/X86/prologue-stack.ll @@ -6,7 +6,7 @@ ; return 0; ; } -define i32 @isel_line_test2() nounwind uwtable !dbg !5 { +define i32 @isel_line_test2(i32 %arg) nounwind uwtable !dbg !5 { ; The stack adjustment should be part of the prologue. ; CHECK: isel_line_test2: ; CHECK: {{subq|leaq}} {{.*}}, %rsp @@ -14,8 +14,9 @@ define i32 @isel_line_test2() nounwind uwtable !dbg !5 { ; CHECK: movl $400, %edi ; CHECK: callq callme entry: + ; %arg should get spilled here, so we need to setup a stackframe %call = call i32 @callme(i32 400), !dbg !10 - ret i32 0, !dbg !12 + ret i32 %arg, !dbg !12 } declare i32 @callme(i32) diff --git a/llvm/test/DebugInfo/X86/reference-argument.ll b/llvm/test/DebugInfo/X86/reference-argument.ll index 4bdb44a796ed4..3beb16e2ff145 100644 --- a/llvm/test/DebugInfo/X86/reference-argument.ll +++ b/llvm/test/DebugInfo/X86/reference-argument.ll @@ -13,7 +13,7 @@ ; CHECK-NOT: DW_TAG_subprogram ; CHECK: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_location -; CHECK-NEXT: DW_OP_breg4 RSI+0 +; CHECK-NEXT: DW_OP_breg5 RDI+0 ; CHECK-NEXT: DW_AT_name {{.*}} "v" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll b/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll index 82c852034aebc..fb0c5779ca8bb 100644 --- a/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll +++ b/llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s | FileCheck %s -; RUN: llc -O0 < %s | FileCheck %s +; RUN: llc < %s | FileCheck -check-prefixes=CHECK,OPT %s +; RUN: llc -O0 < %s | FileCheck -check-prefixes=CHECK,OPTNONE %s ; Make sure we insert DW_OP_deref when spilling indirect DBG_VALUE instructions. @@ -21,10 +21,18 @@ ; } ; CHECK-LABEL: _Z10get_stringv: -; CHECK: #DEBUG_VALUE: get_string:result <- [$rdi+0] -; CHECK: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill -; CHECK: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0] -; CHECK: callq _ZN6stringC1Ei + +; OPT: #DEBUG_VALUE: get_string:result <- [$rdi+0] +; OPT: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill +; OPT: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0] +; OPT: callq _ZN6stringC1Ei + +; OPTNONE: #DEBUG_VALUE: get_string:result <- [DW_OP_deref] [$rsp+0] +; OPTNONE: movq %rdi, %rax +; OPTNONE: movq %rax, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill +; OPTNONE: #DEBUG_VALUE: get_string:result <- [$rdi+0] +; OPTNONE: callq _ZN6stringC1Ei + ; CHECK: #APP ; CHECK: #NO_APP diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll index c87b57c524db4..f245cbaa627cb 100644 --- a/llvm/test/DebugInfo/X86/sret.ll +++ b/llvm/test/DebugInfo/X86/sret.ll @@ -3,16 +3,17 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK-DWO: DW_AT_GNU_dwo_id (0x7e62530711b94622) -; CHECK-DWO: DW_AT_GNU_dwo_id (0x7e62530711b94622) +; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e) +; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e) -; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck %s -; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck %s +; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,FASTISEL %s +; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,SDAG %s ; CHECK: _ZN1B9AInstanceEv ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location (0x00000000 -; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0 -; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-24, DW_OP_deref) +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) ; CHECK-NEXT: DW_AT_name {{.*}}"a" %class.A = type { i32 (...)**, i32 } diff --git a/llvm/test/DebugInfo/X86/subreg.ll b/llvm/test/DebugInfo/X86/subreg.ll index 1a0feb95e931f..37f3181d87980 100644 --- a/llvm/test/DebugInfo/X86/subreg.ll +++ b/llvm/test/DebugInfo/X86/subreg.ll @@ -3,7 +3,7 @@ ; We are testing that a value in a 16 bit register gets reported as ; being in its superregister. -; CHECK: .byte 85 # super-register DW_OP_reg5 +; CHECK: .byte 80 # super-register DW_OP_reg0 ; No need to a piece at offset 0. ; CHECK-NOT: DW_OP_piece ; CHECK-NOT: DW_OP_bit_piece From a66fca44ac926b25820f0e9344db1947d966291b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 28 Sep 2020 13:42:17 -0400 Subject: [PATCH 163/544] RegAllocFast: Add extra DBG_VALUE for live out spills This allows LiveDebugValues to insert the proper DBG_VALUEs in live out blocks if a spill is inserted before the use of a register. Previously, this would see the register use as the last DBG_VALUE, even though the stack slot should be treated as the live out value. This avoids an lldb test regression when D52010 is re-applied. --- llvm/lib/CodeGen/RegAllocFast.cpp | 19 +- .../fast-regalloc-live-out-debug-values.mir | 222 ++++++++++++++++++ llvm/test/DebugInfo/X86/fission-ranges.ll | 2 +- llvm/test/DebugInfo/X86/op_deref.ll | 3 +- llvm/test/DebugInfo/X86/parameters.ll | 3 +- llvm/test/DebugInfo/X86/sret.ll | 10 +- llvm/test/DebugInfo/X86/subreg.ll | 3 - 7 files changed, 249 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index cfee1a77d6b8c..03411bebf747a 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -255,7 +255,7 @@ namespace { int getStackSpaceFor(Register VirtReg); void spill(MachineBasicBlock::iterator Before, Register VirtReg, - MCPhysReg AssignedReg, bool Kill); + MCPhysReg AssignedReg, bool Kill, bool LiveOut); void reload(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg PhysReg); @@ -384,7 +384,7 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) { /// Insert spill instruction for \p AssignedReg before \p Before. Update /// DBG_VALUEs with \p VirtReg operands with the stack slot. void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, - MCPhysReg AssignedReg, bool Kill) { + MCPhysReg AssignedReg, bool Kill, bool LiveOut) { LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " in " << printReg(AssignedReg, TRI)); int FI = getStackSpaceFor(VirtReg); @@ -394,6 +394,8 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); ++NumStores; + MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator(); + // When we spill a virtual register, we will have spill instructions behind // every definition of it, meaning we can switch all the DBG_VALUEs over // to just reference the stack slot. @@ -403,6 +405,17 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); + + if (LiveOut) { + // We need to insert a DBG_VALUE at the end of the block if the spill slot + // is live out, but there is another use of the value after the + // spill. This will allow LiveDebugValues to see the correct live out + // value to propagate to the successors. + MachineInstr *ClonedDV = MBB->getParent()->CloneMachineInstr(NewDV); + MBB->insert(FirstTerm, ClonedDV); + LLVM_DEBUG(dbgs() << "Cloning debug info due to live out spill\n"); + } + // Rewrite unassigned dbg_values to use the stack slot. MachineOperand &MO = DBG->getOperand(0); if (MO.isReg() && MO.getReg() == 0) @@ -868,7 +881,7 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: " << LRI->Reloaded << '\n'); bool Kill = LRI->LastUse == nullptr; - spill(SpillBefore, VirtReg, PhysReg, Kill); + spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut); LRI->LastUse = nullptr; } LRI->LiveOut = false; diff --git a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir new file mode 100644 index 0000000000000..2b39ee1c91317 --- /dev/null +++ b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir @@ -0,0 +1,222 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -start-before=regallocfast -stop-after=livedebugvalues -verify-machineinstrs -o - %s | FileCheck %s +# DBG_VALUEs for %0 should be present in the use blocks + +--- | + define dso_local i32 @foo(i32 %a) #0 !dbg !6 { + entry: + %a.addr = alloca i32, align 4 + %saved_stack = alloca i8*, align 8 + %__vla_expr0 = alloca i64, align 8 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !DIExpression()), !dbg !12 + %0 = load i32, i32* %a.addr, align 4, !dbg !13 + %1 = zext i32 %0 to i64, !dbg !14 + %2 = call i8* @llvm.stacksave(), !dbg !14 + store i8* %2, i8** %saved_stack, align 8, !dbg !14 + %vla = alloca i32, i64 %1, align 16, !dbg !14 + store i64 %1, i64* %__vla_expr0, align 8, !dbg !14 + call void @llvm.dbg.declare(metadata i64* %__vla_expr0, metadata !15, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.declare(metadata i32* %vla, metadata !18, metadata !DIExpression()), !dbg !22 + call void @llvm.dbg.declare(metadata i32* %i, metadata !23, metadata !DIExpression()), !dbg !25 + store i32 0, i32* %i, align 4, !dbg !25 + br label %for.cond, !dbg !26 + + for.cond: ; preds = %for.inc, %entry + %3 = load i32, i32* %i, align 4, !dbg !27 + %4 = load i32, i32* %a.addr, align 4, !dbg !29 + %cmp = icmp slt i32 %3, %4, !dbg !30 + br i1 %cmp, label %for.body, label %for.end, !dbg !31 + + for.body: ; preds = %for.cond + %5 = load i32, i32* %a.addr, align 4, !dbg !32 + %6 = load i32, i32* %i, align 4, !dbg !33 + %sub = sub nsw i32 %5, %6, !dbg !34 + %7 = load i32, i32* %i, align 4, !dbg !35 + %idxprom = sext i32 %7 to i64, !dbg !36 + %arrayidx = getelementptr inbounds i32, i32* %vla, i64 %idxprom, !dbg !36 + store i32 %sub, i32* %arrayidx, align 4, !dbg !37 + br label %for.inc, !dbg !36 + + for.inc: ; preds = %for.body + %8 = load i32, i32* %i, align 4, !dbg !38 + %inc = add nsw i32 %8, 1, !dbg !38 + store i32 %inc, i32* %i, align 4, !dbg !38 + br label %for.cond, !dbg !39, !llvm.loop !40 + + for.end: ; preds = %for.cond + %9 = load i32, i32* %a.addr, align 4, !dbg !42 + %sub1 = sub nsw i32 %9, 1, !dbg !43 + %idxprom2 = sext i32 %sub1 to i64, !dbg !44 + %arrayidx3 = getelementptr inbounds i32, i32* %vla, i64 %idxprom2, !dbg !44 + %10 = load i32, i32* %arrayidx3, align 4, !dbg !44 + %11 = load i8*, i8** %saved_stack, align 8, !dbg !45 + call void @llvm.stackrestore(i8* %11), !dbg !45 + ret i32 %10, !dbg !45 + } + + declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + declare i8* @llvm.stacksave() #2 + declare void @llvm.stackrestore(i8*) #2 + + attributes #0 = { noinline nounwind optnone uwtable } + attributes #1 = { nounwind readnone speculatable willreturn } + attributes #2 = { nounwind } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project.git 954995d0a45729c7935b82258c166524ee87ad3f)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "/home/matt/src/llvm-project/lldb/test/API/lang/c/vla/main.c", directory: "/home/matt/src/llvm-project/build_debug_lldbg") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = distinct !DISubprogram(name: "foo", scope: !7, file: !7, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !7 = !DIFile(filename: "lldb/test/API/lang/c/vla/main.c", directory: "/home/matt/src/llvm-project") + !8 = !DISubroutineType(types: !9) + !9 = !{!10, !10} + !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !11 = !DILocalVariable(name: "a", arg: 1, scope: !6, file: !7, line: 3, type: !10) + !12 = !DILocation(line: 3, column: 13, scope: !6) + !13 = !DILocation(line: 4, column: 11, scope: !6) + !14 = !DILocation(line: 4, column: 3, scope: !6) + !15 = !DILocalVariable(name: "__vla_expr0", scope: !6, type: !16, flags: DIFlagArtificial) + !16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned) + !17 = !DILocation(line: 0, scope: !6) + !18 = !DILocalVariable(name: "vla", scope: !6, file: !7, line: 4, type: !19) + !19 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, elements: !20) + !20 = !{!21} + !21 = !DISubrange(count: !15) + !22 = !DILocation(line: 4, column: 7, scope: !6) + !23 = !DILocalVariable(name: "i", scope: !24, file: !7, line: 6, type: !10) + !24 = distinct !DILexicalBlock(scope: !6, file: !7, line: 6, column: 3) + !25 = !DILocation(line: 6, column: 12, scope: !24) + !26 = !DILocation(line: 6, column: 8, scope: !24) + !27 = !DILocation(line: 6, column: 19, scope: !28) + !28 = distinct !DILexicalBlock(scope: !24, file: !7, line: 6, column: 3) + !29 = !DILocation(line: 6, column: 23, scope: !28) + !30 = !DILocation(line: 6, column: 21, scope: !28) + !31 = !DILocation(line: 6, column: 3, scope: !24) + !32 = !DILocation(line: 7, column: 14, scope: !28) + !33 = !DILocation(line: 7, column: 16, scope: !28) + !34 = !DILocation(line: 7, column: 15, scope: !28) + !35 = !DILocation(line: 7, column: 9, scope: !28) + !36 = !DILocation(line: 7, column: 5, scope: !28) + !37 = !DILocation(line: 7, column: 12, scope: !28) + !38 = !DILocation(line: 6, column: 26, scope: !28) + !39 = !DILocation(line: 6, column: 3, scope: !28) + !40 = distinct !{!40, !31, !41} + !41 = !DILocation(line: 7, column: 16, scope: !24) + !42 = !DILocation(line: 10, column: 14, scope: !6) + !43 = !DILocation(line: 10, column: 15, scope: !6) + !44 = !DILocation(line: 10, column: 10, scope: !6) + !45 = !DILocation(line: 11, column: 1, scope: !6) + +... +--- +name: foo +tracksRegLiveness: true +frameInfo: + hasCalls: true +stack: + - { id: 0, name: a.addr, size: 4, alignment: 4, debug-info-variable: '!11', + debug-info-expression: '!DIExpression()', debug-info-location: '!12' } + - { id: 1, name: __vla_expr0, size: 8, alignment: 8, debug-info-variable: '!15', + debug-info-expression: '!DIExpression()', debug-info-location: '!17' } + - { id: 2, name: i, size: 4, alignment: 4, debug-info-variable: '!23', + debug-info-expression: '!DIExpression()', debug-info-location: '!25' } + - { id: 3, name: vla, type: variable-sized, alignment: 1 } +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $edi, $rbx + ; CHECK: frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp + ; CHECK: CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK: CFI_INSTRUCTION offset $rbp, -16 + ; CHECK: $rbp = frame-setup MOV64rr $rsp + ; CHECK: CFI_INSTRUCTION def_cfa_register $rbp + ; CHECK: frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp, debug-location !13 + ; CHECK: $rsp = frame-setup SUB64ri8 $rsp, 40, implicit-def dead $eflags + ; CHECK: CFI_INSTRUCTION offset $rbx, -24 + ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !13 :: (dereferenceable load 4 from %ir.a.addr) + ; CHECK: renamable $rax = KILL killed renamable $eax, debug-location !13 + ; CHECK: $rcx = MOV64rr $rsp, debug-location !14 + ; CHECK: MOV64mr $rbp, 1, $noreg, -40, $noreg, $rcx :: (store 8 into %stack.4) + ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 + ; CHECK: $rsp = MOV64rr $rcx, debug-location !14 + ; CHECK: MOV64mr $rbp, 1, $noreg, -24, $noreg, killed renamable $rax, debug-location !14 :: (store 8 into %ir.__vla_expr0) + ; CHECK: DBG_VALUE renamable $rcx, 0, !18, !DIExpression(), debug-location !22 + ; CHECK: MOV32mi $rbp, 1, $noreg, -28, $noreg, 0, debug-location !25 :: (store 4 into %ir.i) + ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 + ; CHECK: bb.1.for.cond: + ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 + ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -28, $noreg, debug-location !27 :: (load 4 from %ir.i) + ; CHECK: CMP32rm killed renamable $eax, $rbp, 1, $noreg, -12, $noreg, implicit-def $eflags, debug-location !30 :: (load 4 from %ir.a.addr) + ; CHECK: JCC_1 %bb.4, 13, implicit killed $eflags, debug-location !31 + ; CHECK: bb.2.for.body: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 + ; CHECK: $rax = MOV64rm $rbp, 1, $noreg, -40, $noreg :: (load 8 from %stack.4) + ; CHECK: renamable $edx = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !32 :: (load 4 from %ir.a.addr) + ; CHECK: renamable $rcx = MOVSX64rm32 $rbp, 1, $noreg, -28, $noreg, debug-location !36 :: (load 4 from %ir.i) + ; CHECK: MOV32mr renamable $rax, 4, killed renamable $rcx, 0, $noreg, killed renamable $edx, debug-location !37 :: (store 4 into %ir.arrayidx) + ; CHECK: bb.3.for.inc: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 + ; CHECK: JMP_1 %bb.1, debug-location !39 + ; CHECK: bb.4.for.end: + ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 + ; CHECK: $rax = IMPLICIT_DEF + ; CHECK: $rax = MOV64rm $rbp, 1, $noreg, -40, $noreg :: (load 8 from %stack.4) + ; CHECK: dead $rbx = IMPLICIT_DEF + ; CHECK: dead $rcx = IMPLICIT_DEF + ; CHECK: dead $rdx = IMPLICIT_DEF + ; CHECK: renamable $rcx = IMPLICIT_DEF + ; CHECK: renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, debug-location !44 :: (load 4 from %ir.arrayidx3) + ; CHECK: $rsp = LEA64r $rbp, 1, $noreg, -8, $noreg, debug-location !45 + ; CHECK: $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !45 + ; CHECK: $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !45 + ; CHECK: CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !45 + ; CHECK: RETQ implicit killed $eax, debug-location !45 + bb.0.entry: + liveins: $edi + + %0:gr32 = COPY $edi + %1:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg, debug-location !13 :: (dereferenceable load 4 from %ir.a.addr) + %2:gr64_nosp = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit, debug-location !13 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, debug-location !14 + %3:gr64 = COPY $rsp, debug-location !14 + $rsp = COPY %3, debug-location !14 + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, debug-location !14 + MOV64mr %stack.1.__vla_expr0, 1, $noreg, 0, $noreg, %2, debug-location !14 :: (store 8 into %ir.__vla_expr0) + DBG_VALUE %3, 0, !18, !DIExpression(), debug-location !22 + MOV32mi %stack.2.i, 1, $noreg, 0, $noreg, 0, debug-location !25 :: (store 4 into %ir.i) + + bb.1.for.cond: + %4:gr32 = MOV32rm %stack.2.i, 1, $noreg, 0, $noreg, debug-location !27 :: (load 4 from %ir.i) + CMP32rm %4, %stack.0.a.addr, 1, $noreg, 0, $noreg, implicit-def $eflags, debug-location !30 :: (load 4 from %ir.a.addr) + JCC_1 %bb.4, 13, implicit $eflags, debug-location !31 + + bb.2.for.body: + %5:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg, debug-location !32 :: (load 4 from %ir.a.addr) + %6:gr64_nosp = MOVSX64rm32 %stack.2.i, 1, $noreg, 0, $noreg, debug-location !36 :: (load 4 from %ir.i) + MOV32mr %3, 4, %6, 0, $noreg, killed %5, debug-location !37 :: (store 4 into %ir.arrayidx) + + bb.3.for.inc: + JMP_1 %bb.1, debug-location !39 + + bb.4.for.end: + $rax = IMPLICIT_DEF + $rbx = IMPLICIT_DEF + $rcx = IMPLICIT_DEF + $rdx = IMPLICIT_DEF + %7:gr64_nosp = IMPLICIT_DEF + %8:gr32 = MOV32rm %3, 4, %7, 0, $noreg, debug-location !44 :: (load 4 from %ir.arrayidx3) + $eax = COPY %8, debug-location !45 + RETQ implicit $eax, debug-location !45 + +... diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll index 8174cabe29327..bb6320f73f721 100644 --- a/llvm/test/DebugInfo/X86/fission-ranges.ll +++ b/llvm/test/DebugInfo/X86/fission-ranges.ll @@ -51,7 +51,7 @@ ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[E]]: ; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x0000000b): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x0000005a): DW_OP_breg7 RSP-36 +; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x0000005a): DW_OP_breg7 RSP-48 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[B]]: ; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000b): DW_OP_reg0 RAX diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll index e357d3c9b02e5..8fb6340a184ea 100644 --- a/llvm/test/DebugInfo/X86/op_deref.ll +++ b/llvm/test/DebugInfo/X86/op_deref.ll @@ -7,7 +7,8 @@ ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 ; DWARF4-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref -; DWARF4-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref) +; DWARF4-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref) ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 ; DWARF3-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref diff --git a/llvm/test/DebugInfo/X86/parameters.ll b/llvm/test/DebugInfo/X86/parameters.ll index 9b139eaffffc6..dafde9acceff2 100644 --- a/llvm/test/DebugInfo/X86/parameters.ll +++ b/llvm/test/DebugInfo/X86/parameters.ll @@ -38,7 +38,8 @@ ; CHECK: DW_TAG_formal_parameter ; CHECK: DW_AT_location{{.*}}( ; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref -; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref) +; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref +; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref) ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name{{.*}} = "g" diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll index f245cbaa627cb..59d98866e0912 100644 --- a/llvm/test/DebugInfo/X86/sret.ll +++ b/llvm/test/DebugInfo/X86/sret.ll @@ -3,8 +3,8 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e) -; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e) +; CHECK-DWO: DW_AT_GNU_dwo_id (0xa58a336e896549f1) +; CHECK-DWO: DW_AT_GNU_dwo_id (0xa58a336e896549f1) ; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,FASTISEL %s ; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,SDAG %s @@ -12,8 +12,10 @@ ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location (0x00000000 ; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref -; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) -; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0 +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref) +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0 +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref) ; CHECK-NEXT: DW_AT_name {{.*}}"a" %class.A = type { i32 (...)**, i32 } diff --git a/llvm/test/DebugInfo/X86/subreg.ll b/llvm/test/DebugInfo/X86/subreg.ll index 37f3181d87980..671af9e05fe24 100644 --- a/llvm/test/DebugInfo/X86/subreg.ll +++ b/llvm/test/DebugInfo/X86/subreg.ll @@ -11,9 +11,6 @@ define i16 @f(i16 signext %zzz) nounwind !dbg !1 { entry: call void @llvm.dbg.value(metadata i16 %zzz, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1) - br label %exit - -exit: %conv = sext i16 %zzz to i32, !dbg !7 %conv1 = trunc i32 %conv to i16 ret i16 %conv1 From d93459992e559e774e7b14208e5bd8bf27a58280 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 22 Sep 2020 11:46:41 -0400 Subject: [PATCH 164/544] LiveDebugValues: Fix typos and indentation --- llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 16921ef0d5fc0..8833021243a7a 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -22,7 +22,7 @@ /// and the VarLocBasedLDV class is an implementation that explicitly tracks /// locations, using the VarLoc class. /// -/// The cannonical "available expressions" problem doesn't have expression +/// The canonical "available expressions" problem doesn't have expression /// clobbering, instead when a variable is re-assigned, any expressions using /// that variable get invalidated. LiveDebugValues can map onto "available /// expressions" by having every register represented by a variable, which is @@ -826,7 +826,7 @@ VarLocBasedLDV::VarLocBasedLDV() { } VarLocBasedLDV::~VarLocBasedLDV() { } /// Erase a variable from the set of open ranges, and additionally erase any -/// fragments that may overlap it. If the VarLoc is a buckup location, erase +/// fragments that may overlap it. If the VarLoc is a backup location, erase /// the variable from the EntryValuesBackupVars set, indicating we should stop /// tracking its backup entry location. Otherwise, if the VarLoc is primary /// location, erase the variable from the Vars set. @@ -1837,8 +1837,8 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) { MachineBasicBlock &First_MBB = *(MF.begin()); for (auto &MI : First_MBB) { collectRegDefs(MI, DefinedRegs, TRI); - if (MI.isDebugValue()) - recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs); + if (MI.isDebugValue()) + recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs); } // Initialize per-block structures and scan for fragment overlaps. From 5aa1119537fe6569b54d0da4d9d649a6940decff Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 26 Sep 2020 10:14:14 -0400 Subject: [PATCH 165/544] GlobalISel: Assert if MoreElements uses a non-vector type --- llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index d14682ccc054b..30acac14bc5f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -148,7 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule, if (NewTy.getNumElements() <= OldElts) return false; } - } + } else if (Rule.getAction() == MoreElements) + return false; // Make sure the element type didn't change. return NewTy.getScalarType() == OldTy.getScalarType(); From 2ef73025afda6481625b74eb99cdbc2eb1cfef95 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 15:32:57 +0100 Subject: [PATCH 166/544] [InstCombine] Remove %tmp variable names from bswap-fold tests Appease update_test_checks script that was complaining about potential %TMP clashes --- .../test/Transforms/InstCombine/bswap-fold.ll | 252 +++++++++--------- 1 file changed, 126 insertions(+), 126 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index 8fdecb628b858..5e9c0923d6c5a 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -5,29 +5,29 @@ ; A & 255 define i32 @test4(i32 %a) nounwind { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP2:%.*]] = and i32 %a, 255 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[T2:%.*]] = and i32 [[A:%.*]], 255 +; CHECK-NEXT: ret i32 [[T2]] ; - %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) - %tmp4 = lshr i32 %tmp2, 24 - ret i32 %tmp4 + %t2 = tail call i32 @llvm.bswap.i32( i32 %a ) + %t4 = lshr i32 %t2, 24 + ret i32 %t4 } ; a >> 24 define i32 @test6(i32 %a) nounwind { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 %a, 24 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[A:%.*]], 24 +; CHECK-NEXT: ret i32 [[T2]] ; - %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) - %tmp4 = and i32 %tmp2, 255 - ret i32 %tmp4 + %t2 = tail call i32 @llvm.bswap.i32( i32 %a ) + %t4 = and i32 %t2, 255 + ret i32 %t4 } ; PR5284 define i16 @test7(i32 %A) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 %A, 16 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 16 ; CHECK-NEXT: [[D:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[D]] ; @@ -39,7 +39,7 @@ define i16 @test7(i32 %A) { define i16 @test8(i64 %A) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 %A, 48 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 48 ; CHECK-NEXT: [[D:%.*]] = trunc i64 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[D]] ; @@ -63,7 +63,7 @@ define i64 @foo() { ; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) define i16 @bs_and16i(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_and16i( -; CHECK-NEXT: [[TMP1:%.*]] = and i16 %a, 4391 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[A:%.*]], 4391 ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) ; CHECK-NEXT: ret i16 [[TMP2]] ; @@ -74,132 +74,132 @@ define i16 @bs_and16i(i16 %a, i16 %b) #0 { define i16 @bs_and16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_and16( -; CHECK-NEXT: [[TMP1:%.*]] = and i16 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) ; CHECK-NEXT: ret i16 [[TMP2]] ; - %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a) - %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b) - %tmp3 = and i16 %tmp1, %tmp2 - ret i16 %tmp3 + %t1 = tail call i16 @llvm.bswap.i16(i16 %a) + %t2 = tail call i16 @llvm.bswap.i16(i16 %b) + %t3 = and i16 %t1, %t2 + ret i16 %t3 } define i16 @bs_or16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_or16( -; CHECK-NEXT: [[TMP1:%.*]] = or i16 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) ; CHECK-NEXT: ret i16 [[TMP2]] ; - %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a) - %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b) - %tmp3 = or i16 %tmp1, %tmp2 - ret i16 %tmp3 + %t1 = tail call i16 @llvm.bswap.i16(i16 %a) + %t2 = tail call i16 @llvm.bswap.i16(i16 %b) + %t3 = or i16 %t1, %t2 + ret i16 %t3 } define i16 @bs_xor16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_xor16( -; CHECK-NEXT: [[TMP1:%.*]] = xor i16 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) ; CHECK-NEXT: ret i16 [[TMP2]] ; - %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a) - %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b) - %tmp3 = xor i16 %tmp1, %tmp2 - ret i16 %tmp3 + %t1 = tail call i16 @llvm.bswap.i16(i16 %a) + %t2 = tail call i16 @llvm.bswap.i16(i16 %b) + %t3 = xor i16 %t1, %t2 + ret i16 %t3 } define i32 @bs_and32i(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_and32i( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, -1585053440 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -1585053440 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; - %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a) - %tmp2 = and i32 %tmp1, 100001 - ret i32 %tmp2 + %t1 = tail call i32 @llvm.bswap.i32(i32 %a) + %t2 = and i32 %t1, 100001 + ret i32 %t2 } define i32 @bs_and32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_and32( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; - %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a) - %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b) - %tmp3 = and i32 %tmp1, %tmp2 - ret i32 %tmp3 + %t1 = tail call i32 @llvm.bswap.i32(i32 %a) + %t2 = tail call i32 @llvm.bswap.i32(i32 %b) + %t3 = and i32 %t1, %t2 + ret i32 %t3 } define i32 @bs_or32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_or32( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; - %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a) - %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b) - %tmp3 = or i32 %tmp1, %tmp2 - ret i32 %tmp3 + %t1 = tail call i32 @llvm.bswap.i32(i32 %a) + %t2 = tail call i32 @llvm.bswap.i32(i32 %b) + %t3 = or i32 %t1, %t2 + ret i32 %t3 } define i32 @bs_xor32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_xor32( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; - %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a) - %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b) - %tmp3 = xor i32 %tmp1, %tmp2 - ret i32 %tmp3 + %t1 = tail call i32 @llvm.bswap.i32(i32 %a) + %t2 = tail call i32 @llvm.bswap.i32(i32 %b) + %t3 = xor i32 %t1, %t2 + ret i32 %t3 } define i64 @bs_and64i(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64i( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 %a, 129085117527228416 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], 129085117527228416 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) ; CHECK-NEXT: ret i64 [[TMP2]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = and i64 %tmp1, 1000000001 - ret i64 %tmp2 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = and i64 %t1, 1000000001 + ret i64 %t2 } define i64 @bs_and64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64( -; CHECK-NEXT: [[TMP1:%.*]] = and i64 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) ; CHECK-NEXT: ret i64 [[TMP2]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) - %tmp3 = and i64 %tmp1, %tmp2 - ret i64 %tmp3 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = tail call i64 @llvm.bswap.i64(i64 %b) + %t3 = and i64 %t1, %t2 + ret i64 %t3 } define i64 @bs_or64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_or64( -; CHECK-NEXT: [[TMP1:%.*]] = or i64 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) ; CHECK-NEXT: ret i64 [[TMP2]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) - %tmp3 = or i64 %tmp1, %tmp2 - ret i64 %tmp3 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = tail call i64 @llvm.bswap.i64(i64 %b) + %t3 = or i64 %t1, %t2 + ret i64 %t3 } define i64 @bs_xor64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_xor64( -; CHECK-NEXT: [[TMP1:%.*]] = xor i64 %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) ; CHECK-NEXT: ret i64 [[TMP2]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) - %tmp3 = xor i64 %tmp1, %tmp2 - ret i64 %tmp3 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = tail call i64 @llvm.bswap.i64(i64 %b) + %t3 = xor i64 %t1, %t2 + ret i64 %t3 } define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 { @@ -208,10 +208,10 @@ define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; - %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) - %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) - %tmp3 = and <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %t2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %t3 = and <2 x i32> %t1, %t2 + ret <2 x i32> %t3 } define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 { @@ -220,10 +220,10 @@ define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; - %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) - %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) - %tmp3 = or <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %t2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %t3 = or <2 x i32> %t1, %t2 + ret <2 x i32> %t3 } define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 { @@ -232,10 +232,10 @@ define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; - %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) - %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) - %tmp3 = xor <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %t2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) + %t3 = xor <2 x i32> %t1, %t2 + ret <2 x i32> %t3 } define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { @@ -244,9 +244,9 @@ define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; - %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) - %tmp2 = and <2 x i32> %tmp1, - ret <2 x i32> %tmp2 + %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %t2 = and <2 x i32> %t1, + ret <2 x i32> %t2 } define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { @@ -255,9 +255,9 @@ define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; - %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) - %tmp2 = or <2 x i32> %tmp1, - ret <2 x i32> %tmp2 + %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %t2 = or <2 x i32> %t1, + ret <2 x i32> %t2 } define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 { @@ -266,69 +266,69 @@ define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; - %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) - %tmp2 = xor <2 x i32> %tmp1, - ret <2 x i32> %tmp2 + %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) + %t2 = xor <2 x i32> %t1, + ret <2 x i32> %t2 } define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64_multiuse1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) +; CHECK-NEXT: [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[T2]] +; CHECK-NEXT: [[T4:%.*]] = mul i64 [[T3]], [[T1]] +; CHECK-NEXT: [[T5:%.*]] = mul i64 [[T4]], [[T2]] +; CHECK-NEXT: ret i64 [[T5]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) - %tmp3 = and i64 %tmp1, %tmp2 - %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps - %tmp5 = mul i64 %tmp4, %tmp2 ; to increase use count of the bswaps - ret i64 %tmp5 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = tail call i64 @llvm.bswap.i64(i64 %b) + %t3 = and i64 %t1, %t2 + %t4 = mul i64 %t3, %t1 ; to increase use count of the bswaps + %t5 = mul i64 %t4, %t2 ; to increase use count of the bswaps + ret i64 %t5 } define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64_multiuse2( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[T4:%.*]] = mul i64 [[TMP2]], [[T1]] +; CHECK-NEXT: ret i64 [[T4]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) - %tmp3 = and i64 %tmp1, %tmp2 - %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps - ret i64 %tmp4 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = tail call i64 @llvm.bswap.i64(i64 %b) + %t3 = and i64 %t1, %t2 + %t4 = mul i64 %t3, %t1 ; to increase use count of the bswaps + ret i64 %t4 } define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64_multiuse3( -; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[T4:%.*]] = mul i64 [[TMP2]], [[T2]] +; CHECK-NEXT: ret i64 [[T4]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b) - %tmp3 = and i64 %tmp1, %tmp2 - %tmp4 = mul i64 %tmp3, %tmp2 ; to increase use count of the bswaps - ret i64 %tmp4 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = tail call i64 @llvm.bswap.i64(i64 %b) + %t3 = and i64 %t1, %t2 + %t4 = mul i64 %t3, %t2 ; to increase use count of the bswaps + ret i64 %t4 } define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and64i_multiuse( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1000000001 -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP1]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) +; CHECK-NEXT: [[T2:%.*]] = and i64 [[T1]], 1000000001 +; CHECK-NEXT: [[T3:%.*]] = mul i64 [[T2]], [[T1]] +; CHECK-NEXT: ret i64 [[T3]] ; - %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a) - %tmp2 = and i64 %tmp1, 1000000001 - %tmp3 = mul i64 %tmp2, %tmp1 ; to increase use count of the bswap - ret i64 %tmp3 + %t1 = tail call i64 @llvm.bswap.i64(i64 %a) + %t2 = and i64 %t1, 1000000001 + %t3 = mul i64 %t2, %t1 ; to increase use count of the bswap + ret i64 %t3 } declare i16 @llvm.bswap.i16(i16) From 3a7487f903e2a6be29de39058eee2372e30798d5 Mon Sep 17 00:00:00 2001 From: Xiangling Liao Date: Wed, 30 Sep 2020 10:35:00 -0400 Subject: [PATCH 167/544] [FE] Use preferred alignment instead of ABI alignment for complete object when applicable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some targets, preferred alignment is larger than ABI alignment in some cases. For example, on AIX we have special power alignment rules which would cause that. Previously, to support those cases, we added a “PreferredAlignment” field in the `RecordLayout` to store the AIX special alignment values in “PreferredAlignment” as the community suggested. However, that patch alone is not enough. There are places in the Clang where `PreferredAlignment` should have been used instead of ABI-specified alignment. This patch is aimed at fixing those spots. Differential Revision: https://reviews.llvm.org/D86790 --- clang/include/clang/AST/ASTContext.h | 22 ++++++++++--- clang/lib/AST/ASTContext.cpp | 11 ++++--- clang/lib/CodeGen/CGExprCXX.cpp | 7 +++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 4 +-- clang/lib/CodeGen/TargetInfo.cpp | 4 --- clang/test/CodeGen/aix-alignment.c | 41 +++++++++++++++++++++++++ clang/test/CodeGenCXX/aix-alignment.cpp | 40 ++++++++++++++++++++++++ 7 files changed, 112 insertions(+), 17 deletions(-) create mode 100644 clang/test/CodeGen/aix-alignment.c create mode 100644 clang/test/CodeGenCXX/aix-alignment.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index de0d1198b6d40..d30cf045f1040 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2134,16 +2134,25 @@ class ASTContext : public RefCountedBase { } unsigned getTypeUnadjustedAlign(const Type *T) const; - /// Return the ABI-specified alignment of a type, in bits, or 0 if + /// Return the alignment of a type, in bits, or 0 if /// the type is incomplete and we cannot determine the alignment (for - /// example, from alignment attributes). - unsigned getTypeAlignIfKnown(QualType T) const; + /// example, from alignment attributes). The returned alignment is the + /// Preferred alignment if NeedsPreferredAlignment is true, otherwise is the + /// ABI alignment. + unsigned getTypeAlignIfKnown(QualType T, + bool NeedsPreferredAlignment = false) const; /// Return the ABI-specified alignment of a (complete) type \p T, in /// characters. CharUnits getTypeAlignInChars(QualType T) const; CharUnits getTypeAlignInChars(const Type *T) const; + /// Return the PreferredAlignment of a (complete) type \p T, in + /// characters. + CharUnits getPreferredTypeAlignInChars(QualType T) const { + return toCharUnitsFromBits(getPreferredTypeAlign(T)); + } + /// getTypeUnadjustedAlignInChars - Return the ABI-specified alignment of a type, /// in characters, before alignment adjustments. This method does not work on /// incomplete types. @@ -2166,7 +2175,12 @@ class ASTContext : public RefCountedBase { /// the current target, in bits. /// /// This can be different than the ABI alignment in cases where it is - /// beneficial for performance to overalign a data type. + /// beneficial for performance or backwards compatibility preserving to + /// overalign a data type. (Note: despite the name, the preferred alignment + /// is ABI-impacting, and not an optimization.) + unsigned getPreferredTypeAlign(QualType T) const { + return getPreferredTypeAlign(T.getTypePtr()); + } unsigned getPreferredTypeAlign(const Type *T) const; /// Return the default alignment for __attribute__((aligned)) on diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index fc7abeaae9b17..376a0b044010a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1836,7 +1836,8 @@ bool ASTContext::isAlignmentRequired(QualType T) const { return isAlignmentRequired(T.getTypePtr()); } -unsigned ASTContext::getTypeAlignIfKnown(QualType T) const { +unsigned ASTContext::getTypeAlignIfKnown(QualType T, + bool NeedsPreferredAlignment) const { // An alignment on a typedef overrides anything else. if (const auto *TT = T->getAs()) if (unsigned Align = TT->getDecl()->getMaxAlignment()) @@ -1845,7 +1846,7 @@ unsigned ASTContext::getTypeAlignIfKnown(QualType T) const { // If we have an (array of) complete type, we're done. T = getBaseElementType(T); if (!T->isIncompleteType()) - return getTypeAlign(T); + return NeedsPreferredAlignment ? getPreferredTypeAlign(T) : getTypeAlign(T); // If we had an array type, its element type might be a typedef // type with an alignment attribute. @@ -2402,7 +2403,8 @@ CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const { /// getPreferredTypeAlign - Return the "preferred" alignment of the specified /// type for the current target in bits. This can be different than the ABI /// alignment in cases where it is beneficial for performance or backwards -/// compatibility preserving to overalign a data type. +/// compatibility preserving to overalign a data type. (Note: despite the name, +/// the preferred alignment is ABI-impacting, and not an optimization.) unsigned ASTContext::getPreferredTypeAlign(const Type *T) const { TypeInfo TI = getTypeInfo(T); unsigned ABIAlign = TI.Align; @@ -2458,7 +2460,8 @@ unsigned ASTContext::getTargetDefaultAlignForAttributeAligned() const { /// to a global variable of the specified type. unsigned ASTContext::getAlignOfGlobalVar(QualType T) const { uint64_t TypeSize = getTypeSize(T.getTypePtr()); - return std::max(getTypeAlign(T), getTargetInfo().getMinGlobalAlign(TypeSize)); + return std::max(getPreferredTypeAlign(T), + getTargetInfo().getMinGlobalAlign(TypeSize)); } /// getAlignOfGlobalVarInChars - Return the alignment in characters that diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index e33730b9ae901..c8b059fd7db08 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1570,7 +1570,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { llvm::Value *allocSize = EmitCXXNewAllocSize(*this, E, minElements, numElements, allocSizeWithoutCookie); - CharUnits allocAlign = getContext().getTypeAlignInChars(allocType); + CharUnits allocAlign = getContext().getPreferredTypeAlignInChars(allocType); // Emit the allocation call. If the allocator is a global placement // operator, just "inline" it directly. @@ -1820,8 +1820,9 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, // Pass the alignment if the delete function has an align_val_t parameter. if (Params.Alignment) { QualType AlignValType = *ParamTypeIt++; - CharUnits DeleteTypeAlign = getContext().toCharUnitsFromBits( - getContext().getTypeAlignIfKnown(DeleteTy)); + CharUnits DeleteTypeAlign = + getContext().toCharUnitsFromBits(getContext().getTypeAlignIfKnown( + DeleteTy, true /* NeedsPreferredAlignment */)); llvm::Value *Align = llvm::ConstantInt::get(ConvertType(AlignValType), DeleteTypeAlign.getQuantity()); DeleteArgs.add(RValue::get(Align), AlignValType); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 69825a036a1e4..cfb736ce0ff1c 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2111,7 +2111,7 @@ CharUnits ItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) { // The array cookie is a size_t; pad that up to the element alignment. // The cookie is actually right-justified in that space. return std::max(CharUnits::fromQuantity(CGM.SizeSizeInBytes), - CGM.getContext().getTypeAlignInChars(elementType)); + CGM.getContext().getPreferredTypeAlignInChars(elementType)); } Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, @@ -2128,7 +2128,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, // The size of the cookie. CharUnits CookieSize = - std::max(SizeSize, Ctx.getTypeAlignInChars(ElementType)); + std::max(SizeSize, Ctx.getPreferredTypeAlignInChars(ElementType)); assert(CookieSize == getArrayCookieSizeImpl(ElementType)); // Compute an offset to the cookie. diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 5c052b7fb84b4..f39ded3dc31c0 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -4512,8 +4512,6 @@ ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); - // TODO: Evaluate if AIX power alignment rule would have an impact on the - // alignment here. if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); @@ -4530,8 +4528,6 @@ ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const { if (Ty->isVectorType()) llvm::report_fatal_error("vector type is not supported on AIX yet"); - // TODO: Evaluate if AIX power alignment rule would have an impact on the - // alignment here. if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. diff --git a/clang/test/CodeGen/aix-alignment.c b/clang/test/CodeGen/aix-alignment.c new file mode 100644 index 0000000000000..fdb0bad197bb7 --- /dev/null +++ b/clang/test/CodeGen/aix-alignment.c @@ -0,0 +1,41 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc-unknown-aix -emit-llvm -o - %s | \ +// RUN: FileCheck %s --check-prefixes=AIX,AIX32 +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -emit-llvm -o - %s | \ +// RUN: FileCheck %s --check-prefixes=AIX,AIX64 + +// AIX: @d = global double 0.000000e+00, align 8 +double d; + +typedef struct { + double d; + int i; +} StructDouble; + +// AIX: @d1 = global %struct.StructDouble zeroinitializer, align 8 +StructDouble d1; + +// AIX: double @retDouble(double %x) +// AIX: %x.addr = alloca double, align 8 +// AIX: store double %x, double* %x.addr, align 8 +// AIX: load double, double* %x.addr, align 8 +// AIX: ret double %0 +double retDouble(double x) { return x; } + +// AIX32: define void @bar(%struct.StructDouble* noalias sret align 4 %agg.result, %struct.StructDouble* byval(%struct.StructDouble) align 4 %x) +// AIX64: define void @bar(%struct.StructDouble* noalias sret align 4 %agg.result, %struct.StructDouble* byval(%struct.StructDouble) align 8 %x) +// AIX: %0 = bitcast %struct.StructDouble* %agg.result to i8* +// AIX: %1 = bitcast %struct.StructDouble* %x to i8* +// AIX32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) +// AIX64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 8 %1, i64 16, i1 false) +StructDouble bar(StructDouble x) { return x; } + +// AIX: define void @foo(double* %out, double* %in) +// AIX32: %0 = load double*, double** %in.addr, align 4 +// AIX64: %0 = load double*, double** %in.addr, align 8 +// AIX: %1 = load double, double* %0, align 4 +// AIX: %mul = fmul double %1, 2.000000e+00 +// AIX32: %2 = load double*, double** %out.addr, align 4 +// AIX64: %2 = load double*, double** %out.addr, align 8 +// AIX: store double %mul, double* %2, align 4 +void foo(double *out, double *in) { *out = *in * 2; } diff --git a/clang/test/CodeGenCXX/aix-alignment.cpp b/clang/test/CodeGenCXX/aix-alignment.cpp new file mode 100644 index 0000000000000..4c8330b42e929 --- /dev/null +++ b/clang/test/CodeGenCXX/aix-alignment.cpp @@ -0,0 +1,40 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm -o - -x c++ %s | \ +// RUN: FileCheck %s --check-prefixes=AIX,AIX32 +// RUN: %clang_cc1 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm -o - %s -x c++| \ +// RUN: FileCheck %s --check-prefixes=AIX,AIX64 + +struct B { + double d; + ~B() {} +}; + +// AIX32: %call = call noalias nonnull i8* @_Znam(i32 8) +// AIX64: %call = call noalias nonnull i8* @_Znam(i64 8) +B *allocBp() { return new B[0]; } + +// AIX-LABEL: delete.notnull: +// AIX32: %0 = bitcast %struct.B* %call to i8* +// AIX32: %1 = getelementptr inbounds i8, i8* %0, i32 -8 +// AIX32: %2 = getelementptr inbounds i8, i8* %1, i32 4 +// AIX32: %3 = bitcast i8* %2 to i32* +// AIX64: %0 = bitcast %struct.B* %call to i8* +// AIX64: %1 = getelementptr inbounds i8, i8* %0, i64 -8 +// AIX64: %2 = bitcast i8* %1 to i64* +void bar() { delete[] allocBp(); } + +typedef struct D { + double d; + int i; + + ~D(){}; +} D; + +// AIX: define void @_Z3foo1D(%struct.D* noalias sret align 4 %agg.result, %struct.D* %x) +// AIX: %1 = bitcast %struct.D* %agg.result to i8* +// AIX: %2 = bitcast %struct.D* %x to i8* +// AIX32 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %2, i32 16, i1 false) +// AIX64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 %2, i64 16, i1 false) +D foo(D x) { return x; } From 892fdc923f06adbef507ebe594fa7b48224d93f0 Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Tue, 29 Sep 2020 16:14:49 -0700 Subject: [PATCH 168/544] [mlir][Linalg] Generalize the logic to compute reassociation maps while folding tensor_reshape op. While folding reshapes that introduce unit extent dims, the logic to compute the reassociation maps can be generalized to handle some corner cases, for example, when the folded shape still has unit-extent dims but corresponds to folded unit extent dims of the expanded shape. Differential Revision: https://reviews.llvm.org/D88521 --- .../Linalg/Transforms/DropUnitDims.cpp | 87 +++++++++---------- .../Dialect/Linalg/drop-unit-extent-dims.mlir | 16 ++++ 2 files changed, 58 insertions(+), 45 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp index 08e7e352d63e9..611c938ab542f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -403,61 +403,58 @@ struct FoldReshapeOpWithUnitExtent : OpRewritePattern { srcType.getRank() < dstType.getRank() || parentSrcType.getRank() == dstType.getRank()) return failure(); + // Check if the result tensor_reshape after folding the reshapeOp and // parentReshapeOp are combined. // If the final tensor_reshape is folding, the parentReshapeOp is // introducing unit-dims, and the reshapeOp does an actual reshape. - // If the final tensor_reshape op is expanding, the reshapeOp is introducing - // unit-dims, and the parentReshapeOp does an actual reshape. + // If the final tensor_reshape op is expanding, the reshapeOp is + // introducing unit-dims, and the parentReshapeOp does an actual reshape. bool isFoldingPattern = parentSrcType.getRank() > dstType.getRank(); - auto reassociationMaps = isFoldingPattern - ? reshapeOp.getReassociationMaps() - : parentReshapeOp.getReassociationMaps(); - DenseSet conservedDimensions; - for (auto &map : reassociationMaps) { - if (map.getNumResults() == 1) { - conservedDimensions.insert( - map.getResult(0).cast().getPosition()); - } - } - - // Find positions at which the unit-dims exist. - int64_t nonUnitDimPos = 0; - DenseMap nonUnitSrcDims; - ArrayRef nonUnitShape = + ArrayRef expandedShape = isFoldingPattern ? parentSrcType.getShape() : dstType.getShape(); - for (auto shape : enumerate(srcType.getShape())) { - // Case 1 : It is a conserved dimension. - if (conservedDimensions.count(shape.index())) { - nonUnitSrcDims[shape.index()] = nonUnitDimPos++; - continue; + ArrayRef foldedShape = + isFoldingPattern ? dstType.getShape() : parentSrcType.getShape(); + + unsigned expandedDim = 0, foldedDim = 0; + SmallVector, 4> reassociationExprs( + foldedShape.size()); + while (expandedDim < expandedShape.size() && + foldedDim < foldedShape.size()) { + int64_t dstSize = foldedShape[foldedDim]; + int64_t srcSize = expandedShape[expandedDim]; + while (srcSize < dstSize && expandedDim < expandedShape.size()) { + reassociationExprs[foldedDim].push_back( + rewriter.getAffineDimExpr(expandedDim++)); + srcSize *= expandedShape[expandedDim]; } - // Case 2 : Dimensions dont match but the intermediate tensor is unit-dim. - if (shape.value() == 1) - continue; - // Case 3 : Dimensions match, treat it as a non-unit src dim. - if (nonUnitDimPos < static_cast(nonUnitShape.size()) && - nonUnitShape[nonUnitDimPos] == shape.value()) { - nonUnitSrcDims[shape.index()] = nonUnitDimPos++; - continue; + if (srcSize == dstSize) { + reassociationExprs[foldedDim].push_back( + rewriter.getAffineDimExpr(expandedDim++)); + // If the next dim in foldedShape is not 1, treat subsequent dims in + // expandedShape which are 1 to be collapsed. + if (foldedDim == foldedShape.size() - 1 || + foldedShape[foldedDim + 1] != 1) { + while (expandedDim < expandedShape.size() && + expandedShape[expandedDim] == 1) { + reassociationExprs[foldedDim].push_back( + rewriter.getAffineDimExpr(expandedDim++)); + } + } + } else { + return failure(); } - return failure(); + foldedDim++; } + if (expandedDim != expandedShape.size()) + return failure(); - // Compute reassociation maps for the final operation. Use the reassociation - // maps that is actually doing a reshape (and not just introducing - // unit-dims). From these maps, prune the unit-extent dimensions. - for (AffineMap &map : reassociationMaps) { - SmallVector exprs; - exprs.reserve(nonUnitSrcDims.size()); - for (auto result : map.getResults()) { - unsigned dim = result.cast().getPosition(); - if (nonUnitSrcDims.count(dim)) - exprs.push_back(rewriter.getAffineDimExpr(nonUnitSrcDims[dim])); - } - map = AffineMap::get(nonUnitSrcDims.size(), 0, exprs, - rewriter.getContext()); - } + SmallVector reassociationMaps = + llvm::to_vector<4>(llvm::map_range( + reassociationExprs, [&](ArrayRef exprs) -> AffineMap { + return AffineMap::get(expandedShape.size(), 0, exprs, + rewriter.getContext()); + })); rewriter.replaceOpWithNewOp( reshapeOp, dstType, parentReshapeOp.src(), rewriter.getAffineMapArrayAttr(reassociationMaps)); diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index 06e56c5cb7d2a..1793d2b59b706 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -240,3 +240,19 @@ func @fold_reshape(%arg0 : tensor<2048x1x2048xf32>) -> tensor<4x512x1x512x4xf32> : tensor<1x4x1x512x1x1x512x1x4xf32> into tensor<4x512x1x512x4xf32> return %1 : tensor<4x512x1x512x4xf32> } + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK: func @fold_reshape +// CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]] +// CHECK-SAME: tensor<2xf32> into tensor<2x1xf32> +func @fold_reshape(%arg0: tensor<2xf32>) -> tensor<2x1xf32> +{ + %0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1, d2) -> (d0, d1, d2)>] : tensor<2xf32> into tensor<2x1x1xf32> + %1 = linalg.tensor_reshape %0 + [affine_map<(d0, d1, d2) -> (d0)>, + affine_map<(d0, d1, d2) -> (d1, d2)> + ] : tensor<2x1x1xf32> into tensor<2x1xf32> + return %1 : tensor<2x1xf32> +} From b85de2c69cf3d6fbc2ad3439a6224667a58f704c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 15:42:53 +0100 Subject: [PATCH 169/544] [InstCombine] Add bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) vector tests Add tests showing failure to correctly fold vector bswap(trunc(bswap(x))) intrinsic patterns --- .../test/Transforms/InstCombine/bswap-fold.ll | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index 5e9c0923d6c5a..c90b880f0ea77 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -37,6 +37,16 @@ define i16 @test7(i32 %A) { ret i16 %D } +define <2 x i16> @test7_vector(<2 x i32> %A) { +; CHECK-LABEL: @test7_vector( +; CHECK-NEXT: ret <2 x i16> undef +; + %B = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %A) nounwind + %C = trunc <2 x i32> %B to <2 x i16> + %D = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %C) nounwind + ret <2 x i16> %D +} + define i16 @test8(i64 %A) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A:%.*]], 48 @@ -49,6 +59,16 @@ define i16 @test8(i64 %A) { ret i16 %D } +define <2 x i16> @test8_vector(<2 x i64> %A) { +; CHECK-LABEL: @test8_vector( +; CHECK-NEXT: ret <2 x i16> undef +; + %B = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %A) nounwind + %C = trunc <2 x i64> %B to <2 x i16> + %D = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %C) nounwind + ret <2 x i16> %D +} + ; Misc: Fold bswap(undef) to undef. define i64 @foo() { ; CHECK-LABEL: @foo( @@ -334,4 +354,6 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 { declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) declare i64 @llvm.bswap.i64(i64) +declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) From 323d08e50a7bb80786dc00a8ade6ae49e1358393 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 15:51:54 +0100 Subject: [PATCH 170/544] [InstCombine] Fix bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) vector support Use getScalarSizeInBits not getPrimitiveSizeInBits to determine the shift value at the element level. --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 4 ++-- llvm/test/Transforms/InstCombine/bswap-fold.ll | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 465191b4ae1f7..c069657809295 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -828,8 +828,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { - unsigned C = X->getType()->getPrimitiveSizeInBits() - - IIOperand->getType()->getPrimitiveSizeInBits(); + unsigned C = X->getType()->getScalarSizeInBits() - + IIOperand->getType()->getScalarSizeInBits(); Value *CV = ConstantInt::get(X->getType(), C); Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index c90b880f0ea77..da7380e0ab74e 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -39,7 +39,9 @@ define i16 @test7(i32 %A) { define <2 x i16> @test7_vector(<2 x i32> %A) { ; CHECK-LABEL: @test7_vector( -; CHECK-NEXT: ret <2 x i16> undef +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[D]] ; %B = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %A) nounwind %C = trunc <2 x i32> %B to <2 x i16> @@ -61,7 +63,9 @@ define i16 @test8(i64 %A) { define <2 x i16> @test8_vector(<2 x i64> %A) { ; CHECK-LABEL: @test8_vector( -; CHECK-NEXT: ret <2 x i16> undef +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[D]] ; %B = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %A) nounwind %C = trunc <2 x i64> %B to <2 x i16> From 2c394bd4071d32000e2eed0f7d90fe7c576d7050 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 30 Sep 2020 17:01:14 +0200 Subject: [PATCH 171/544] [PowerPC] Avoid unused variable warning in Release builds PPCFrameLowering.cpp:632:8: warning: unused variable 'isAIXABI' [-Wunused-variable] --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 340a4f867ced1..6f1fe4e113bda 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -39,15 +39,6 @@ EnablePEVectorSpills("ppc-enable-pe-vector-spills", cl::desc("Enable spills in prologue to vector registers."), cl::init(false), cl::Hidden); -/// VRRegNo - Map from a numbered VR register to its enum value. -/// -static const MCPhysReg VRRegNo[] = { - PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , - PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; - static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { if (STI.isAIXABI()) return STI.isPPC64() ? 16 : 8; @@ -629,9 +620,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool isAIXABI = Subtarget.isAIXABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); + assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); // Work out frame sizes. unsigned FrameSize = determineFrameLayoutAndUpdate(MF); From 052c5bf40a9fc9ffe1bb2669763d8a0d2dea2b2e Mon Sep 17 00:00:00 2001 From: Zarko Todorovski Date: Wed, 30 Sep 2020 11:03:03 -0400 Subject: [PATCH 172/544] [PPC] Do not emit extswsli in 32BIT mode when using -mcpu=pwr9 It looks like in some circumstances when compiling with `-mcpu=pwr9` we create an EXTSWSLI node when which causes llc to fail. No such error occurs in pwr8 or lower. This occurs in 32BIT AIX and BE Linux. the cause seems to be that the default return in combineSHL is to create an EXTSWSLI node. Adding a check for whether we are in PPC64 before that fixes the issue. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D87046 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++--- llvm/test/CodeGen/PowerPC/ppc-32bit-shift.ll | 28 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/ppc-32bit-shift.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 75b5ec9ec13ae..0efb03589ef60 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16322,10 +16322,10 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { SDValue N0 = N->getOperand(0); ConstantSDNode *CN1 = dyn_cast(N->getOperand(1)); - if (!Subtarget.isISA3_0() || + if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() || N0.getOpcode() != ISD::SIGN_EXTEND || - N0.getOperand(0).getValueType() != MVT::i32 || - CN1 == nullptr || N->getValueType(0) != MVT::i64) + N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr || + N->getValueType(0) != MVT::i64) return SDValue(); // We can't save an operation here if the value is already extended, and diff --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-shift.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-shift.ll new file mode 100644 index 0000000000000..8c6df8c5edfb1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-shift.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=32BIT + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64 \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=64BIT + +define dso_local void @foo(i32 %inta, i64* %long_intb) { + entry: + %conv = sext i32 %inta to i64 + %shl = shl nsw i64 %conv, 8 + store i64 %shl, i64* %long_intb, align 8 + ret void +} + +; CHECK-LABEL: foo: + +; 32BIT-DAG: srawi [[REG1:[0-9]+]], [[REG2:[0-9]+]], 31 +; 32BIT-DAG: rotlwi [[REG3:[0-9]+]], [[REG2]], 8 +; 32BIT-DAG: slwi [[REG4:[0-9]+]], [[REG2]], 8 +; 32BIT-DAG: rlwimi [[REG5:[0-9]+]], [[REG1]], 8, 0, 23 +; 32BIT-DAG: stw [[REG4]], 4([[REG6:[0-9]+]]) +; 32BIT-DAG: stw [[REG5]], 0([[REG6]]) +; 32BIT: blr + +; 64BIT: extswsli [[REG1:[0-9]+]], [[REG2:[0-9]+]], 8 +; 64BIT-NEXT: std [[REG1]], 0([[REG3:[0-9]+]]) +; 64BIT-NEXT: blr From f425418fc4ebd989c6c3d59d20e7fe37cb29259c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Sep 2020 16:08:52 +0100 Subject: [PATCH 173/544] [InstCombine] Add tests for 'partial' bswap patterns As mentioned on PR47191, if we're bswap'ing some bytes and the zero'ing the remainder we can perform this as a bswap+mask which helps us match 'partial' bswaps as a first step towards folding into a more complex bswap pattern. --- llvm/test/Transforms/InstCombine/bswap.ll | 47 +++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 965f149b6d95e..8adcb748b96f0 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -345,6 +345,53 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { ret i8 %7 } +define i64 @bswap_and_mask_0(i64 %0) { +; CHECK-LABEL: @bswap_and_mask_0( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] +; + %2 = lshr i64 %0, 56 + %3 = shl i64 %0, 56 + %4 = or i64 %2, %3 + ret i64 %4 +} + +define i64 @bswap_and_mask_1(i64 %0) { +; CHECK-LABEL: @bswap_and_mask_1( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP4]], [[TMP2]] +; CHECK-NEXT: ret i64 [[TMP5]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = or i64 %4, %2 + ret i64 %5 +} + +define i64 @bswap_and_mask_2(i64 %0) { +; CHECK-LABEL: @bswap_and_mask_2( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] +; CHECK-NEXT: ret i64 [[TMP7]] +; + %2 = lshr i64 %0, 56 + %3 = shl i64 %0, 56 + %4 = or i64 %2, %3 + %5 = shl i64 %0, 40 + %6 = and i64 %5, 71776119061217280 + %7 = or i64 %4, %6 + ret i64 %7 +} + define i32 @shuf_4bytes(<4 x i8> %x) { ; CHECK-LABEL: @shuf_4bytes( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[X:%.*]] to i32 From d6de40f8865e2c016731f9b63d8a0a218ce1b74f Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 29 Sep 2020 09:09:25 -0700 Subject: [PATCH 174/544] [NFC][regalloc] Make VirtRegAuxInfo part of allocator state All the state of VRAI is allocator-wide, so we can avoid creating it every time we need it. In addition, the normalization function is allocator-specific. In a next change, we can simplify that design in favor of just having it as a virtual member. Differential Revision: https://reviews.llvm.org/D88499 --- llvm/include/llvm/CodeGen/CalcSpillWeights.h | 14 ++++---------- llvm/lib/CodeGen/CalcSpillWeights.cpp | 14 ++++---------- llvm/lib/CodeGen/RegAllocBasic.cpp | 7 +++---- llvm/lib/CodeGen/RegAllocGreedy.cpp | 15 ++++++++------- llvm/lib/CodeGen/RegAllocPBQP.cpp | 5 +++-- 5 files changed, 22 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index 9b8b7324f30a3..d2e79170384d0 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -91,17 +91,11 @@ class VirtRegMap; /// \return The spill weight. Returns negative weight for unspillable li. float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr, SlotIndex *end = nullptr); - }; - - /// Compute spill weights and allocation hints for all virtual register - /// live intervals. - void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, - VirtRegMap *VRM, - const MachineLoopInfo &MLI, - const MachineBlockFrequencyInfo &MBFI, - VirtRegAuxInfo::NormalizingFn norm = - normalizeSpillWeight); + /// Compute spill weights and allocation hints for all virtual register + /// live intervals. + void calculateSpillWeightsAndHints(); + }; } // end namespace llvm #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 75cf6a63dc9a7..4f59e08637628 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -28,22 +28,16 @@ using namespace llvm; #define DEBUG_TYPE "calcspillweights" -void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, - MachineFunction &MF, - VirtRegMap *VRM, - const MachineLoopInfo &MLI, - const MachineBlockFrequencyInfo &MBFI, - VirtRegAuxInfo::NormalizingFn norm) { +void VirtRegAuxInfo::calculateSpillWeightsAndHints() { LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; - VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); + calculateSpillWeightAndHint(LIS.getInterval(Reg)); } } diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 0fa50d97fb22a..8bbbbeb78236c 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -311,10 +311,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis(), getAnalysis(), getAnalysis()); - - calculateSpillWeightsAndHints(*LIS, *MF, VRM, - getAnalysis(), - getAnalysis()); + VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis(), + getAnalysis()); + VRAI.calculateSpillWeightsAndHints(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index eb0a096b9b4be..c1595391eca10 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -172,6 +172,7 @@ class RAGreedy : public MachineFunctionPass, std::unique_ptr SpillerInstance; PQueue Queue; unsigned NextCascade; + std::unique_ptr VRAI; // Live ranges pass through a number of stages as we try to allocate them. // Some of the stages may also create new live ranges: @@ -1507,10 +1508,9 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, // Now, check to see if the local interval we will create is going to be // expensive enough to evict somebody If so, this may cause a bad eviction // chain. - VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis(), *MBFI); float splitArtifactWeight = - VRAI.futureWeight(LIS->getInterval(Evictee), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + VRAI->futureWeight(LIS->getInterval(Evictee), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) return false; @@ -1550,10 +1550,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, // Have we found an interval that can be evicted? if (FutureEvictedPhysReg) { - VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis(), *MBFI); float splitArtifactWeight = - VRAI.futureWeight(LIS->getInterval(VirtRegToSplit), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + VRAI->futureWeight(LIS->getInterval(VirtRegToSplit), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); // Will the weight of the local interval be higher than the cheapest evictee // weight? If so it will evict it and will not cause a spill. if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight) @@ -3228,7 +3227,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); - calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); + VRAI = std::make_unique(*MF, *LIS, VRM, *Loops, *MBFI); + + VRAI->calculateSpillWeightsAndHints(); LLVM_DEBUG(LIS->dump()); diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 0f848f62f7d1e..eb5dec51c8d0d 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -792,8 +792,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { VirtRegMap &VRM = getAnalysis(); - calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis(), - MBFI, normalizePBQPSpillWeight); + VirtRegAuxInfo VRAI(MF, LIS, &VRM, getAnalysis(), MBFI, + normalizePBQPSpillWeight); + VRAI.calculateSpillWeightsAndHints(); std::unique_ptr VRegSpiller(createInlineSpiller(*this, MF, VRM)); From 05ae04c396519cca9ef50d3b9cafb0cd9c87d1d7 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 30 Sep 2020 17:10:44 +0200 Subject: [PATCH 175/544] [DA][SDA] SyncDependenceAnalysis re-write This patch achieves two things: 1. It breaks up the `join_blocks` interface between the SDA to the DA to return two separate sets for divergent loops exits and divergent, disjoint path joins. 2. It updates the SDA algorithm to run in O(n) time and improves the precision on divergent loop exits. This fixes `https://bugs.llvm.org/show_bug.cgi?id=46372` (by virtue of the improved `join_blocks` interface) and revealed an imprecise expected result in the `Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll` test. Reviewed By: sameerds Differential Revision: https://reviews.llvm.org/D84413 --- .../llvm/Analysis/DivergenceAnalysis.h | 83 ++-- .../llvm/Analysis/SyncDependenceAnalysis.h | 42 +- llvm/lib/Analysis/DivergenceAnalysis.cpp | 332 +++++-------- llvm/lib/Analysis/SyncDependenceAnalysis.cpp | 462 +++++++++++------- .../AMDGPU/hidden_loopdiverge.ll | 3 +- .../AMDGPU/trivial-join-at-loop-exit.ll | 3 - 6 files changed, 455 insertions(+), 470 deletions(-) diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index a2da97bb9059e..8a32bfbcc758f 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -59,8 +59,10 @@ class DivergenceAnalysis { /// \brief Mark \p UniVal as a value that is always uniform. void addUniformOverride(const Value &UniVal); - /// \brief Mark \p DivVal as a value that is always divergent. - void markDivergent(const Value &DivVal); + /// \brief Mark \p DivVal as a value that is always divergent. Will not do so + /// if `isAlwaysUniform(DivVal)`. + /// \returns Whether the tracked divergence state of \p DivVal changed. + bool markDivergent(const Value &DivVal); /// \brief Propagate divergence to all instructions in the region. /// Divergence is seeded by calls to \p markDivergent. @@ -76,45 +78,38 @@ class DivergenceAnalysis { /// \brief Whether \p Val is divergent at its definition. bool isDivergent(const Value &Val) const; - /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent. + /// \brief Whether \p U is divergent. Uses of a uniform value can be + /// divergent. bool isDivergentUse(const Use &U) const; void print(raw_ostream &OS, const Module *) const; private: - bool updateTerminator(const Instruction &Term) const; - bool updatePHINode(const PHINode &Phi) const; - - /// \brief Computes whether \p Inst is divergent based on the - /// divergence of its operands. - /// - /// \returns Whether \p Inst is divergent. - /// - /// This should only be called for non-phi, non-terminator instructions. - bool updateNormalInstruction(const Instruction &Inst) const; - - /// \brief Mark users of live-out users as divergent. - /// - /// \param LoopHeader the header of the divergent loop. - /// - /// Marks all users of live-out values of the loop headed by \p LoopHeader - /// as divergent and puts them on the worklist. - void taintLoopLiveOuts(const BasicBlock &LoopHeader); - - /// \brief Push all users of \p Val (in the region) to the worklist + /// \brief Mark \p Term as divergent and push all Instructions that become + /// divergent as a result on the worklist. + void analyzeControlDivergence(const Instruction &Term); + /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on + /// the worklist. + void taintAndPushPhiNodes(const BasicBlock &JoinBlock); + + /// \brief Identify all Instructions that become divergent because \p DivExit + /// is a divergent loop exit of \p DivLoop. Mark those instructions as + /// divergent and push them on the worklist. + void propagateLoopExitDivergence(const BasicBlock &DivExit, + const Loop &DivLoop); + + /// \brief Internal implementation function for propagateLoopExitDivergence. + void analyzeLoopExitDivergence(const BasicBlock &DivExit, + const Loop &OuterDivLoop); + + /// \brief Mark all instruction as divergent that use a value defined in \p + /// OuterDivLoop. Push their users on the worklist. + void analyzeTemporalDivergence(const Instruction &I, + const Loop &OuterDivLoop); + + /// \brief Push all users of \p Val (in the region) to the worklist. void pushUsers(const Value &I); - /// \brief Push all phi nodes in @block to the worklist - void pushPHINodes(const BasicBlock &Block); - - /// \brief Mark \p Block as join divergent - /// - /// A block is join divergent if two threads may reach it from different - /// incoming blocks at the same time. - void markBlockJoinDivergent(const BasicBlock &Block) { - DivergentJoinBlocks.insert(&Block); - } - /// \brief Whether \p Val is divergent when read in \p ObservingBlock. bool isTemporalDivergent(const BasicBlock &ObservingBlock, const Value &Val) const; @@ -126,24 +121,6 @@ class DivergenceAnalysis { return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end(); } - /// \brief Propagate control-induced divergence to users (phi nodes and - /// instructions). - // - // \param JoinBlock is a divergent loop exit or join point of two disjoint - // paths. - // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop. - bool propagateJoinDivergence(const BasicBlock &JoinBlock, - const Loop *TermLoop); - - /// \brief Propagate induced value divergence due to control divergence in \p - /// Term. - void propagateBranchDivergence(const Instruction &Term); - - /// \brief Propagate divergent caused by a divergent loop exit. - /// - /// \param ExitingLoop is a divergent loop. - void propagateLoopDivergence(const Loop &ExitingLoop); - private: const Function &F; // If regionLoop != nullptr, analysis is only performed within \p RegionLoop. @@ -166,7 +143,7 @@ class DivergenceAnalysis { DenseSet UniformOverrides; // Blocks with joining divergent control from different predecessors. - DenseSet DivergentJoinBlocks; + DenseSet DivergentJoinBlocks; // FIXME Deprecated // Detected/marked divergent values. DenseSet DivergentValues; diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h index 2f07b3135308f..9838d629e93eb 100644 --- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" #include +#include namespace llvm { @@ -30,6 +31,26 @@ class Loop; class PostDominatorTree; using ConstBlockSet = SmallPtrSet; +struct ControlDivergenceDesc { + // Join points of divergent disjoint paths. + ConstBlockSet JoinDivBlocks; + // Divergent loop exits + ConstBlockSet LoopDivBlocks; +}; + +struct ModifiedPO { + std::vector LoopPO; + std::unordered_map POIndex; + void appendBlock(const BasicBlock &BB) { + POIndex[&BB] = LoopPO.size(); + LoopPO.push_back(&BB); + } + unsigned getIndexOf(const BasicBlock &BB) const { + return POIndex.find(&BB)->second; + } + unsigned size() const { return LoopPO.size(); } + const BasicBlock *getBlockAt(unsigned Idx) const { return LoopPO[Idx]; } +}; /// \brief Relates points of divergent control to join points in /// reducible CFGs. @@ -51,28 +72,19 @@ class SyncDependenceAnalysis { /// header. Those exit blocks are added to the returned set. /// If L is the parent loop of \p Term and an exit of L is in the returned /// set then L is a divergent loop. - const ConstBlockSet &join_blocks(const Instruction &Term); - - /// \brief Computes divergent join points and loop exits (in the surrounding - /// loop) caused by the divergent loop exits of\p Loop. - /// - /// The set of blocks which are reachable by disjoint paths from the - /// loop exits of \p Loop. - /// This treats the loop as a single node in \p Loop's parent loop. - /// The returned set has the same properties as for join_blocks(TermInst&). - const ConstBlockSet &join_blocks(const Loop &Loop); + const ControlDivergenceDesc &getJoinBlocks(const Instruction &Term); private: - static ConstBlockSet EmptyBlockSet; + static ControlDivergenceDesc EmptyDivergenceDesc; + + ModifiedPO LoopPO; - ReversePostOrderTraversal FuncRPOT; const DominatorTree &DT; const PostDominatorTree &PDT; const LoopInfo &LI; - std::map> CachedLoopExitJoins; - std::map> - CachedBranchJoins; + std::map> + CachedControlDivDescs; }; } // namespace llvm diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 343406c9bba16..d01a0b95612cc 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==// +//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -97,42 +97,18 @@ DivergenceAnalysis::DivergenceAnalysis( : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA), IsLCSSAForm(IsLCSSAForm) {} -void DivergenceAnalysis::markDivergent(const Value &DivVal) { +bool DivergenceAnalysis::markDivergent(const Value &DivVal) { + if (isAlwaysUniform(DivVal)) + return false; assert(isa(DivVal) || isa(DivVal)); assert(!isAlwaysUniform(DivVal) && "cannot be a divergent"); - DivergentValues.insert(&DivVal); + return DivergentValues.insert(&DivVal).second; } void DivergenceAnalysis::addUniformOverride(const Value &UniVal) { UniformOverrides.insert(&UniVal); } -bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const { - if (Term.getNumSuccessors() <= 1) - return false; - if (auto *BranchTerm = dyn_cast(&Term)) { - assert(BranchTerm->isConditional()); - return isDivergent(*BranchTerm->getCondition()); - } - if (auto *SwitchTerm = dyn_cast(&Term)) { - return isDivergent(*SwitchTerm->getCondition()); - } - if (isa(Term)) { - return false; // ignore abnormal executions through landingpad - } - - llvm_unreachable("unexpected terminator"); -} - -bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const { - // TODO function calls with side effects, etc - for (const auto &Op : I.operands()) { - if (isDivergent(*Op)) - return true; - } - return false; -} - bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock, const Value &Val) const { const auto *Inst = dyn_cast(&Val); @@ -150,32 +126,6 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock, return false; } -bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const { - // joining divergent disjoint path in Phi parent block - if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) { - return true; - } - - // An incoming value could be divergent by itself. - // Otherwise, an incoming value could be uniform within the loop - // that carries its definition but it may appear divergent - // from outside the loop. This happens when divergent loop exits - // drop definitions of that uniform value in different iterations. - // - // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop - // if (i % thread_id == 0) break; // divergent loop exit - // } - // int divI = i; // divI is divergent - for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) { - const auto *InVal = Phi.getIncomingValue(i); - if (isDivergent(*Phi.getIncomingValue(i)) || - isTemporalDivergent(*Phi.getParent(), *InVal)) { - return true; - } - } - return false; -} - bool DivergenceAnalysis::inRegion(const Instruction &I) const { return I.getParent() && inRegion(*I.getParent()); } @@ -184,35 +134,82 @@ bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const { return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB); } -static bool usesLiveOut(const Instruction &I, const Loop *DivLoop) { - for (auto &Op : I.operands()) { - auto *OpInst = dyn_cast(&Op); +void DivergenceAnalysis::pushUsers(const Value &V) { + const auto *I = dyn_cast(&V); + + if (I && I->isTerminator()) { + analyzeControlDivergence(*I); + return; + } + + for (const auto *User : V.users()) { + const auto *UserInst = dyn_cast(User); + if (!UserInst) + continue; + + // only compute divergent inside loop + if (!inRegion(*UserInst)) + continue; + + // All users of divergent values are immediate divergent + if (markDivergent(*UserInst)) + Worklist.push_back(UserInst); + } +} + +static const Instruction *getIfCarriedInstruction(const Use &U, + const Loop &DivLoop) { + const auto *I = dyn_cast(&U); + if (!I) + return nullptr; + if (!DivLoop.contains(I)) + return nullptr; + return I; +} + +void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I, + const Loop &OuterDivLoop) { + if (isAlwaysUniform(I)) + return; + if (isDivergent(I)) + return; + + LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n"); + assert((isa(I) || !IsLCSSAForm) && + "In LCSSA form all users of loop-exiting defs are Phi nodes."); + for (const Use &Op : I.operands()) { + const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop); if (!OpInst) continue; - if (DivLoop->contains(OpInst->getParent())) - return true; + if (markDivergent(I)) + pushUsers(I); + return; } - return false; } // marks all users of loop-carried values of the loop headed by LoopHeader as // divergent -void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) { - auto *DivLoop = LI.getLoopFor(&LoopHeader); - assert(DivLoop && "loopHeader is not actually part of a loop"); +void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, + const Loop &OuterDivLoop) { + // All users are in immediate exit blocks + if (IsLCSSAForm) { + for (const auto &Phi : DivExit.phis()) { + analyzeTemporalDivergence(Phi, OuterDivLoop); + } + return; + } - SmallVector TaintStack; - DivLoop->getExitBlocks(TaintStack); + // For non-LCSSA we have to follow all live out edges wherever they may lead. + const BasicBlock &LoopHeader = *OuterDivLoop.getHeader(); + SmallVector TaintStack; + TaintStack.push_back(&DivExit); // Otherwise potential users of loop-carried values could be anywhere in the // dominance region of DivLoop (including its fringes for phi nodes) DenseSet Visited; - for (auto *Block : TaintStack) { - Visited.insert(Block); - } - Visited.insert(&LoopHeader); + Visited.insert(&DivExit); - while (!TaintStack.empty()) { + do { auto *UserBlock = TaintStack.back(); TaintStack.pop_back(); @@ -220,33 +217,21 @@ void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) { if (!inRegion(*UserBlock)) continue; - assert(!DivLoop->contains(UserBlock) && + assert(!OuterDivLoop.contains(UserBlock) && "irreducible control flow detected"); // phi nodes at the fringes of the dominance region if (!DT.dominates(&LoopHeader, UserBlock)) { // all PHI nodes of UserBlock become divergent for (auto &Phi : UserBlock->phis()) { - Worklist.push_back(&Phi); + analyzeTemporalDivergence(Phi, OuterDivLoop); } continue; } - // taint outside users of values carried by DivLoop + // Taint outside users of values carried by OuterDivLoop. for (auto &I : *UserBlock) { - if (isAlwaysUniform(I)) - continue; - if (isDivergent(I)) - continue; - if (!usesLiveOut(I, DivLoop)) - continue; - - markDivergent(I); - if (I.isTerminator()) { - propagateBranchDivergence(I); - } else { - pushUsers(I); - } + analyzeTemporalDivergence(I, OuterDivLoop); } // visit all blocks in the dominance region @@ -256,56 +241,57 @@ void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) { } TaintStack.push_back(SuccBlock); } - } + } while (!TaintStack.empty()); } -void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) { - for (const auto &Phi : Block.phis()) { - if (isDivergent(Phi)) - continue; - Worklist.push_back(&Phi); +void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit, + const Loop &InnerDivLoop) { + LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n"); + + // Find outer-most loop that does not contain \p DivExit + const Loop *DivLoop = &InnerDivLoop; + const Loop *OuterDivLoop = DivLoop; + const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit); + const unsigned LoopExitDepth = + ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0; + while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) { + DivergentLoops.insert(DivLoop); // all crossed loops are divergent + OuterDivLoop = DivLoop; + DivLoop = DivLoop->getParentLoop(); } -} - -void DivergenceAnalysis::pushUsers(const Value &V) { - for (const auto *User : V.users()) { - const auto *UserInst = dyn_cast(User); - if (!UserInst) - continue; - - if (isDivergent(*UserInst)) - continue; + LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName() + << "\n"); - // only compute divergent inside loop - if (!inRegion(*UserInst)) - continue; - Worklist.push_back(UserInst); - } + analyzeLoopExitDivergence(DivExit, *OuterDivLoop); } -bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock, - const Loop *BranchLoop) { - LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n"); +// this is a divergent join point - mark all phi nodes as divergent and push +// them onto the stack. +void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { + LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName() + << "\n"); // ignore divergence outside the region if (!inRegion(JoinBlock)) { - return false; + return; } // push non-divergent phi nodes in JoinBlock to the worklist - pushPHINodes(JoinBlock); - - // disjoint-paths divergent at JoinBlock - markBlockJoinDivergent(JoinBlock); - - // JoinBlock is a divergent loop exit - return BranchLoop && !BranchLoop->contains(&JoinBlock); + for (const auto &Phi : JoinBlock.phis()) { + if (isDivergent(Phi)) + continue; + // FIXME Theoretically ,the 'undef' value could be replaced by any other + // value causing spurious divergence. + if (Phi.hasConstantOrUndefValue()) + continue; + if (markDivergent(Phi)) + Worklist.push_back(&Phi); + } } -void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) { - LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n"); - - markDivergent(Term); +void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { + LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName() + << "\n"); // Don't propagate divergence from unreachable blocks. if (!DT.isReachableFromEntry(Term.getParent())) @@ -313,104 +299,36 @@ void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) { const auto *BranchLoop = LI.getLoopFor(Term.getParent()); - // whether there is a divergent loop exit from BranchLoop (if any) - bool IsBranchLoopDivergent = false; + const auto &DivDesc = SDA.getJoinBlocks(Term); - // iterate over all blocks reachable by disjoint from Term within the loop - // also iterates over loop exits that become divergent due to Term. - for (const auto *JoinBlock : SDA.join_blocks(Term)) { - IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop); + // Iterate over all blocks now reachable by a disjoint path join + for (const auto *JoinBlock : DivDesc.JoinDivBlocks) { + taintAndPushPhiNodes(*JoinBlock); } - // Branch loop is a divergent loop due to the divergent branch in Term - if (IsBranchLoopDivergent) { - assert(BranchLoop); - if (!DivergentLoops.insert(BranchLoop).second) { - return; - } - propagateLoopDivergence(*BranchLoop); - } -} - -void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) { - LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n"); - - // don't propagate beyond region - if (!inRegion(*ExitingLoop.getHeader())) - return; - - const auto *BranchLoop = ExitingLoop.getParentLoop(); - - // Uses of loop-carried values could occur anywhere - // within the dominance region of the definition. All loop-carried - // definitions are dominated by the loop header (reducible control). - // Thus all users have to be in the dominance region of the loop header, - // except PHI nodes that can also live at the fringes of the dom region - // (incoming defining value). - if (!IsLCSSAForm) - taintLoopLiveOuts(*ExitingLoop.getHeader()); - - // whether there is a divergent loop exit from BranchLoop (if any) - bool IsBranchLoopDivergent = false; - - // iterate over all blocks reachable by disjoint paths from exits of - // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn - // become divergent. - for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) { - IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop); - } - - // Branch loop is a divergent due to divergent loop exit in ExitingLoop - if (IsBranchLoopDivergent) { - assert(BranchLoop); - if (!DivergentLoops.insert(BranchLoop).second) { - return; - } - propagateLoopDivergence(*BranchLoop); + assert(DivDesc.LoopDivBlocks.empty() || BranchLoop); + for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) { + propagateLoopExitDivergence(*DivExitBlock, *BranchLoop); } } void DivergenceAnalysis::compute() { - for (auto *DivVal : DivergentValues) { + // Initialize worklist. + auto DivValuesCopy = DivergentValues; + for (const auto *DivVal : DivValuesCopy) { + assert(isDivergent(*DivVal) && "Worklist invariant violated!"); pushUsers(*DivVal); } - // propagate divergence + // All values on the Worklist are divergent. + // Their users may not have been updated yed. while (!Worklist.empty()) { const Instruction &I = *Worklist.back(); Worklist.pop_back(); - // maintain uniformity of overrides - if (isAlwaysUniform(I)) - continue; - - bool WasDivergent = isDivergent(I); - if (WasDivergent) - continue; - - // propagate divergence caused by terminator - if (I.isTerminator()) { - if (updateTerminator(I)) { - // propagate control divergence to affected instructions - propagateBranchDivergence(I); - continue; - } - } - - // update divergence of I due to divergent operands - bool DivergentUpd = false; - const auto *Phi = dyn_cast(&I); - if (Phi) { - DivergentUpd = updatePHINode(*Phi); - } else { - DivergentUpd = updateNormalInstruction(I); - } - // propagate value divergence to users - if (DivergentUpd) { - markDivergent(I); - pushUsers(I); - } + assert(isDivergent(I) && "Worklist invariant violated!"); + pushUsers(I); } } @@ -444,7 +362,7 @@ GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI) - : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) { + : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) { for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { DA.markDivergent(I); diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp index 36bef705d4f30..0771bb52c4f47 100644 --- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp @@ -1,4 +1,4 @@ -//==- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation -==// +//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -107,271 +107,353 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include #include #include #define DEBUG_TYPE "sync-dependence" +// The SDA algorithm operates on a modified CFG - we modify the edges leaving +// loop headers as follows: +// +// * We remove all edges leaving all loop headers. +// * We add additional edges from the loop headers to their exit blocks. +// +// The modification is virtual, that is whenever we visit a loop header we +// pretend it had different successors. +namespace { +using namespace llvm; + +// Custom Post-Order Traveral +// +// We cannot use the vanilla (R)PO computation of LLVM because: +// * We (virtually) modify the CFG. +// * We want a loop-compact block enumeration, that is the numbers assigned by +// the traveral to the blocks of a loop are an interval. +using POCB = std::function; +using VisitedSet = std::set; +using BlockStack = std::vector; + +// forward +static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, + VisitedSet &Finalized); + +// for a nested region (top-level loop or nested loop) +static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop, + POCB CallBack, VisitedSet &Finalized) { + const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr; + while (!Stack.empty()) { + const auto *NextBB = Stack.back(); + + auto *NestedLoop = LI.getLoopFor(NextBB); + bool IsNestedLoop = NestedLoop != Loop; + + // Treat the loop as a node + if (IsNestedLoop) { + SmallVector NestedExits; + NestedLoop->getUniqueExitBlocks(NestedExits); + bool PushedNodes = false; + for (const auto *NestedExitBB : NestedExits) { + if (NestedExitBB == LoopHeader) + continue; + if (Loop && !Loop->contains(NestedExitBB)) + continue; + if (Finalized.count(NestedExitBB)) + continue; + PushedNodes = true; + Stack.push_back(NestedExitBB); + } + if (!PushedNodes) { + // All loop exits finalized -> finish this node + Stack.pop_back(); + computeLoopPO(LI, *NestedLoop, CallBack, Finalized); + } + continue; + } + + // DAG-style + bool PushedNodes = false; + for (const auto *SuccBB : successors(NextBB)) { + if (SuccBB == LoopHeader) + continue; + if (Loop && !Loop->contains(SuccBB)) + continue; + if (Finalized.count(SuccBB)) + continue; + PushedNodes = true; + Stack.push_back(SuccBB); + } + if (!PushedNodes) { + // Never push nodes twice + Stack.pop_back(); + if (!Finalized.insert(NextBB).second) + continue; + CallBack(*NextBB); + } + } +} + +static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) { + VisitedSet Finalized; + BlockStack Stack; + Stack.reserve(24); // FIXME made-up number + Stack.push_back(&F.getEntryBlock()); + computeStackPO(Stack, LI, nullptr, CallBack, Finalized); +} + +static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, + VisitedSet &Finalized) { + /// Call CallBack on all loop blocks. + std::vector Stack; + const auto *LoopHeader = Loop.getHeader(); + + // Visit the header last + Finalized.insert(LoopHeader); + CallBack(*LoopHeader); + + // Initialize with immediate successors + for (const auto *BB : successors(LoopHeader)) { + if (!Loop.contains(BB)) + continue; + if (BB == LoopHeader) + continue; + Stack.push_back(BB); + } + + // Compute PO inside region + computeStackPO(Stack, LI, &Loop, CallBack, Finalized); +} + +} // namespace + namespace llvm { -ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet; +ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc; SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI) - : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {} + : DT(DT), PDT(PDT), LI(LI) { + computeTopLevelPO(*DT.getRoot()->getParent(), LI, + [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); }); +} SyncDependenceAnalysis::~SyncDependenceAnalysis() {} -using FunctionRPOT = ReversePostOrderTraversal; - // divergence propagator for reducible CFGs struct DivergencePropagator { - const FunctionRPOT &FuncRPOT; + const ModifiedPO &LoopPOT; const DominatorTree &DT; const PostDominatorTree &PDT; const LoopInfo &LI; - - // identified join points - std::unique_ptr JoinBlocks; - - // reached loop exits (by a path disjoint to a path to the loop header) - SmallPtrSet ReachedLoopExits; - - // if DefMap[B] == C then C is the dominating definition at block B - // if DefMap[B] ~ undef then we haven't seen B yet - // if DefMap[B] == B then B is a join point of disjoint paths from X or B is - // an immediate successor of X (initial value). - using DefiningBlockMap = std::map; - DefiningBlockMap DefMap; - - // all blocks with pending visits - std::unordered_set PendingUpdates; - - DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI) - : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI), - JoinBlocks(new ConstBlockSet) {} - - // set the definition at @block and mark @block as pending for a visit - void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) { - bool WasAdded = DefMap.emplace(&Block, &DefBlock).second; - if (WasAdded) - PendingUpdates.insert(&Block); - } + const BasicBlock &DivTermBlock; + + // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at + // block B + // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet + // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths + // from X or B is an immediate successor of X (initial value). + using BlockLabelVec = std::vector; + BlockLabelVec BlockLabels; + // divergent join and loop exit descriptor. + std::unique_ptr DivDesc; + + DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const BasicBlock &DivTermBlock) + : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock), + BlockLabels(LoopPOT.size(), nullptr), + DivDesc(new ControlDivergenceDesc) {} void printDefs(raw_ostream &Out) { - Out << "Propagator::DefMap {\n"; - for (const auto *Block : FuncRPOT) { - auto It = DefMap.find(Block); - Out << Block->getName() << " : "; - if (It == DefMap.end()) { - Out << "\n"; + Out << "Propagator::BlockLabels {\n"; + for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) { + const auto *Label = BlockLabels[BlockIdx]; + Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx + << ") : "; + if (!Label) { + Out << "\n"; } else { - const auto *DefBlock = It->second; - Out << (DefBlock ? DefBlock->getName() : "") << "\n"; + Out << Label->getName() << "\n"; } } Out << "}\n"; } - // process @succBlock with reaching definition @defBlock - // the original divergent branch was in @parentLoop (if any) - void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop, - const BasicBlock &DefBlock) { + // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this + // causes a divergent join. + bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) { + auto SuccIdx = LoopPOT.getIndexOf(SuccBlock); - // @succBlock is a loop exit - if (ParentLoop && !ParentLoop->contains(&SuccBlock)) { - DefMap.emplace(&SuccBlock, &DefBlock); - ReachedLoopExits.insert(&SuccBlock); - return; + // unset or same reaching label + const auto *OldLabel = BlockLabels[SuccIdx]; + if (!OldLabel || (OldLabel == &PushedLabel)) { + BlockLabels[SuccIdx] = &PushedLabel; + return false; } - // first reaching def? - auto ItLastDef = DefMap.find(&SuccBlock); - if (ItLastDef == DefMap.end()) { - addPending(SuccBlock, DefBlock); - return; - } + // Update the definition + BlockLabels[SuccIdx] = &SuccBlock; + return true; + } - // a join of at least two definitions - if (ItLastDef->second != &DefBlock) { - // do we know this join already? - if (!JoinBlocks->insert(&SuccBlock).second) - return; + // visiting a virtual loop exit edge from the loop header --> temporal + // divergence on join + bool visitLoopExitEdge(const BasicBlock &ExitBlock, + const BasicBlock &DefBlock, bool FromParentLoop) { + // Pushing from a non-parent loop cannot cause temporal divergence. + if (!FromParentLoop) + return visitEdge(ExitBlock, DefBlock); - // update the definition - addPending(SuccBlock, SuccBlock); - } + if (!computeJoin(ExitBlock, DefBlock)) + return false; + + // Identified a divergent loop exit + DivDesc->LoopDivBlocks.insert(&ExitBlock); + LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName() + << "\n"); + return true; } - // find all blocks reachable by two disjoint paths from @rootTerm. - // This method works for both divergent terminators and loops with - // divergent exits. - // @rootBlock is either the block containing the branch or the header of the - // divergent loop. - // @nodeSuccessors is the set of successors of the node (Loop or Terminator) - // headed by @rootBlock. - // @parentLoop is the parent loop of the Loop or the loop that contains the - // Terminator. - template - std::unique_ptr - computeJoinPoints(const BasicBlock &RootBlock, - SuccessorIterable NodeSuccessors, const Loop *ParentLoop) { - assert(JoinBlocks); - - LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " - << (ParentLoop ? ParentLoop->getName() : "") + // process \p SuccBlock with reaching definition \p DefBlock + bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) { + if (!computeJoin(SuccBlock, DefBlock)) + return false; + + // Divergent, disjoint paths join. + DivDesc->JoinDivBlocks.insert(&SuccBlock); + LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName()); + return true; + } + + std::unique_ptr computeJoinPoints() { + assert(DivDesc); + + LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName() << "\n"); + const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock); + + // Early stopping criterion + int FloorIdx = LoopPOT.size() - 1; + const BasicBlock *FloorLabel = nullptr; + // bootstrap with branch targets - for (const auto *SuccBlock : NodeSuccessors) { - DefMap.emplace(SuccBlock, SuccBlock); + int BlockIdx = 0; - if (ParentLoop && !ParentLoop->contains(SuccBlock)) { - // immediate loop exit from node. - ReachedLoopExits.insert(SuccBlock); - } else { - // regular successor - PendingUpdates.insert(SuccBlock); - } - } + for (const auto *SuccBlock : successors(&DivTermBlock)) { + auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock); + BlockLabels[SuccIdx] = SuccBlock; - LLVM_DEBUG(dbgs() << "SDA: rpo order:\n"; for (const auto *RpoBlock - : FuncRPOT) { - dbgs() << "- " << RpoBlock->getName() << "\n"; - }); + // Find the successor with the highest index to start with + BlockIdx = std::max(BlockIdx, SuccIdx); + FloorIdx = std::min(FloorIdx, SuccIdx); - auto ItBeginRPO = FuncRPOT.begin(); - auto ItEndRPO = FuncRPOT.end(); + // Identify immediate divergent loop exits + if (!DivBlockLoop) + continue; - // skip until term (TODO RPOT won't let us start at @term directly) - for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) { - assert(ItBeginRPO != ItEndRPO && "Unable to find RootBlock"); + const auto *BlockLoop = LI.getLoopFor(SuccBlock); + if (BlockLoop && DivBlockLoop->contains(BlockLoop)) + continue; + DivDesc->LoopDivBlocks.insert(SuccBlock); + LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: " + << SuccBlock->getName() << "\n"); } // propagate definitions at the immediate successors of the node in RPO - auto ItBlockRPO = ItBeginRPO; - while ((++ItBlockRPO != ItEndRPO) && !PendingUpdates.empty()) { - const auto *Block = *ItBlockRPO; - LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); + for (; BlockIdx >= FloorIdx; --BlockIdx) { + LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs())); - // skip Block if not pending update - auto ItPending = PendingUpdates.find(Block); - if (ItPending == PendingUpdates.end()) + // Any label available here + const auto *Label = BlockLabels[BlockIdx]; + if (!Label) continue; - PendingUpdates.erase(ItPending); - // propagate definition at Block to its successors - auto ItDef = DefMap.find(Block); - const auto *DefBlock = ItDef->second; - assert(DefBlock); + // Ok. Get the block + const auto *Block = LoopPOT.getBlockAt(BlockIdx); + LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); auto *BlockLoop = LI.getLoopFor(Block); - if (ParentLoop && - (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) { - // if the successor is the header of a nested loop pretend its a - // single node with the loop's exits as successors + bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block; + bool CausedJoin = false; + int LoweredFloorIdx = FloorIdx; + if (IsLoopHeader) { + // Disconnect from immediate successors and propagate directly to loop + // exits. SmallVector BlockLoopExits; BlockLoop->getExitBlocks(BlockLoopExits); + + bool IsParentLoop = BlockLoop->contains(&DivTermBlock); for (const auto *BlockLoopExit : BlockLoopExits) { - visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock); + CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop); + LoweredFloorIdx = std::min(LoweredFloorIdx, + LoopPOT.getIndexOf(*BlockLoopExit)); } - } else { - // the successors are either on the same loop level or loop exits + // Acyclic successor case for (const auto *SuccBlock : successors(Block)) { - visitSuccessor(*SuccBlock, ParentLoop, *DefBlock); + CausedJoin |= visitEdge(*SuccBlock, *Label); + LoweredFloorIdx = + std::min(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock)); } } - } - LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); - - // We need to know the definition at the parent loop header to decide - // whether the definition at the header is different from the definition at - // the loop exits, which would indicate a divergent loop exits. - // - // A // loop header - // | - // B // nested loop header - // | - // C -> X (exit from B loop) -..-> (A latch) - // | - // D -> back to B (B latch) - // | - // proper exit from both loops - // - // analyze reached loop exits - if (!ReachedLoopExits.empty()) { - const BasicBlock *ParentLoopHeader = - ParentLoop ? ParentLoop->getHeader() : nullptr; - - assert(ParentLoop); - auto ItHeaderDef = DefMap.find(ParentLoopHeader); - const auto *HeaderDefBlock = - (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second; - - LLVM_DEBUG(printDefs(dbgs())); - assert(HeaderDefBlock && "no definition at header of carrying loop"); - - for (const auto *ExitBlock : ReachedLoopExits) { - auto ItExitDef = DefMap.find(ExitBlock); - assert((ItExitDef != DefMap.end()) && - "no reaching def at reachable loop exit"); - if (ItExitDef->second != HeaderDefBlock) { - JoinBlocks->insert(ExitBlock); - } + // Floor update + if (CausedJoin) { + // 1. Different labels pushed to successors + FloorIdx = LoweredFloorIdx; + } else if (FloorLabel != Label) { + // 2. No join caused BUT we pushed a label that is different than the + // last pushed label + FloorIdx = LoweredFloorIdx; + FloorLabel = Label; } } - return std::move(JoinBlocks); + LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); + + return std::move(DivDesc); } }; -const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) { - using LoopExitVec = SmallVector; - LoopExitVec LoopExits; - Loop.getExitBlocks(LoopExits); - if (LoopExits.size() < 1) { - return EmptyBlockSet; +static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) { + Out << "["; + bool First = true; + for (const auto *BB : Blocks) { + if (!First) + Out << ", "; + First = false; + Out << BB->getName(); } - - // already available in cache? - auto ItCached = CachedLoopExitJoins.find(&Loop); - if (ItCached != CachedLoopExitJoins.end()) { - return *ItCached->second; - } - - // compute all join points - DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; - auto JoinBlocks = Propagator.computeJoinPoints( - *Loop.getHeader(), LoopExits, Loop.getParentLoop()); - - auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks)); - assert(ItInserted.second); - return *ItInserted.first->second; + Out << "]"; } -const ConstBlockSet & -SyncDependenceAnalysis::join_blocks(const Instruction &Term) { +const ControlDivergenceDesc & +SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) { // trivial case - if (Term.getNumSuccessors() < 1) { - return EmptyBlockSet; + if (Term.getNumSuccessors() <= 1) { + return EmptyDivergenceDesc; } // already available in cache? - auto ItCached = CachedBranchJoins.find(&Term); - if (ItCached != CachedBranchJoins.end()) + auto ItCached = CachedControlDivDescs.find(&Term); + if (ItCached != CachedControlDivDescs.end()) return *ItCached->second; // compute all join points - DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; + // Special handling of divergent loop exits is not needed for LCSSA const auto &TermBlock = *Term.getParent(); - auto JoinBlocks = Propagator.computeJoinPoints( - TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); + DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock); + auto DivDesc = Propagator.computeJoinPoints(); + + LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n"; + dbgs() << "JoinDivBlocks: "; + printBlockSet(DivDesc->JoinDivBlocks, dbgs()); + dbgs() << "\nLoopDivBlocks: "; + printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";); - auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); + auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc)); assert(ItInserted.second); return *ItInserted.first->second; } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll index 12e2b0ffd4438..774e995c7ca20 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll @@ -119,9 +119,8 @@ L: br i1 %uni.cond, label %D, label %G X: - %div.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %B ] ; temporal divergent phi + %uni.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %B ] br i1 %uni.cond, label %Y, label %exit -; CHECK: DIVERGENT: %div.merge.x = Y: %div.merge.y = phi i32 [ 42, %X ], [ %b, %C ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll index 8ad848af41f57..b872dd8966bc9 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll @@ -1,7 +1,4 @@ ; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s -; XFAIL: * - -; https://bugs.llvm.org/show_bug.cgi?id=46372 ; CHECK: bb2: ; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ] From 1034262e0a38f0bd755e68aa41b6bb856ebd2eb8 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Mon, 21 Sep 2020 17:15:26 +0900 Subject: [PATCH 176/544] [VE] Support TargetBlockAddress Change to handle TargetBlockAddress and add a regression test for it. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D88576 --- llvm/lib/Target/VE/VEInstrInfo.td | 8 ++++++++ llvm/lib/Target/VE/VEMCInstLower.cpp | 3 +++ llvm/test/CodeGen/VE/blockaddress.ll | 25 +++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 llvm/test/CodeGen/VE/blockaddress.ll diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 6cf84af0401f5..e4270b9c3652a 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1661,6 +1661,14 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)), (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0, (tglobaladdr:$in1))>; +// Address calculation and its optimization +def : Pat<(VEhi tblockaddress:$in), (LEASLzii 0, 0, tblockaddress:$in)>; +def : Pat<(VElo tblockaddress:$in), + (ANDrm (LEAzii 0, 0, tblockaddress:$in), !add(32, 64))>; +def : Pat<(add (VEhi tblockaddress:$in1), (VElo tblockaddress:$in2)), + (LEASLrii (ANDrm (LEAzii 0, 0, tblockaddress:$in2), !add(32, 64)), 0, + (tblockaddress:$in1))>; + // GlobalTLS address calculation and its optimization def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>; def : Pat<(VElo tglobaltlsaddr:$in), diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp index f59cd394de380..c14121d9e18ad 100644 --- a/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -51,6 +51,9 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, break; return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand( + MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP); case MachineOperand::MO_ConstantPoolIndex: return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP); case MachineOperand::MO_ExternalSymbol: diff --git a/llvm/test/CodeGen/VE/blockaddress.ll b/llvm/test/CodeGen/VE/blockaddress.ll new file mode 100644 index 0000000000000..ac4a35e63427f --- /dev/null +++ b/llvm/test/CodeGen/VE/blockaddress.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +@addr = global i8* null, align 8 + +; Function Attrs: nofree norecurse nounwind writeonly +define void @test() { +; CHECK-LABEL: test: +; CHECK: .LBB0_3: # %entry +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: # %bb.1: # %test1 +; CHECK-NEXT: lea %s0, addr@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, addr@hi(, %s0) +; CHECK-NEXT: lea %s1, .Ltmp0@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, .Ltmp0@hi(, %s1) +; CHECK-NEXT: st %s1, (, %s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + br label %test1 + +test1: + store i8* blockaddress(@test, %test1), i8** @addr, align 8 + ret void +} From 4fbd83c716dbc1d68e0aac5d71d201b664762489 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Sat, 26 Sep 2020 00:18:42 -0700 Subject: [PATCH 177/544] [ObjCARCAA][NewPM] Add already ported objc-arc-aa to PassRegistry.def Also add missing AnalysisKey definition. --- llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp | 2 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/test/Transforms/ObjCARC/gvn.ll | 1 + 4 files changed, 5 insertions(+) diff --git a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp index 58eb5281a717d..2cb43d7682308 100644 --- a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -133,6 +133,8 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call, return AAResultBase::getModRefInfo(Call, Loc, AAQI); } +AnalysisKey ObjCARCAA::Key; + ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) { return ObjCARCAAResult(F.getParent()->getDataLayout()); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a0206d075de91..1ea73195740c1 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/PostDominators.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 1cc045885f9b4..add685dbdacc2 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -167,6 +167,7 @@ FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) FUNCTION_ALIAS_ANALYSIS("basic-aa", BasicAA()) FUNCTION_ALIAS_ANALYSIS("cfl-anders-aa", CFLAndersAA()) FUNCTION_ALIAS_ANALYSIS("cfl-steens-aa", CFLSteensAA()) +FUNCTION_ALIAS_ANALYSIS("objc-arc-aa", objcarc::ObjCARCAA()) FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA()) FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA()) FUNCTION_ALIAS_ANALYSIS("tbaa", TypeBasedAA()) diff --git a/llvm/test/Transforms/ObjCARC/gvn.ll b/llvm/test/Transforms/ObjCARC/gvn.ll index fb9a71f14727b..27fb8e4db74e5 100644 --- a/llvm/test/Transforms/ObjCARC/gvn.ll +++ b/llvm/test/Transforms/ObjCARC/gvn.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -basic-aa -objc-arc-aa -gvn < %s | FileCheck %s +; RUN: opt -S -aa-pipeline=basic-aa,objc-arc-aa -passes=gvn < %s | FileCheck %s @x = common global i8* null, align 8 From dd4fb7c8cfe394a3290bd19a1eac03435472ccfa Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 30 Sep 2020 12:23:06 -0400 Subject: [PATCH 178/544] [mlir][openacc] Remove -allow-unregistred-dialect from ops and invalid tests Switch to a dummy op in the test dialect so we can remove the -allow-unregistred-dialect on ops.mlir and invalid.mlir. Change after comment on D88272. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D88587 --- mlir/test/Dialect/OpenACC/invalid.mlir | 18 ++++---- mlir/test/Dialect/OpenACC/ops.mlir | 62 +++++++++++++------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index c56ccdb186f94..be0b0ce611524 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -verify-diagnostics %s +// RUN: mlir-opt -split-input-file -verify-diagnostics %s // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop gang { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -10,7 +10,7 @@ acc.loop gang { // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop worker { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -18,7 +18,7 @@ acc.loop worker { // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop vector { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -26,7 +26,7 @@ acc.loop vector { // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop gang worker { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -34,7 +34,7 @@ acc.loop gang worker { // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop gang vector { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -42,7 +42,7 @@ acc.loop gang vector { // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop worker vector { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -50,7 +50,7 @@ acc.loop worker vector { // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} acc.loop gang worker vector { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } attributes {seq} @@ -147,7 +147,7 @@ acc.loop { // ----- acc.loop { - "some.op"() ({ + "test.openacc_dummy_op"() ({ // expected-error@+1 {{'acc.shutdown' op cannot be nested in a compute operation}} acc.shutdown }) : () -> () diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 7ed4340fa3088..e24b57245e1de 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt -split-input-file -allow-unregistered-dialect %s | FileCheck %s +// RUN: mlir-opt -split-input-file %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt -split-input-file -allow-unregistered-dialect %s | mlir-opt -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt -split-input-file %s | mlir-opt -allow-unregistered-dialect | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -mlir-print-op-generic %s | mlir-opt -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt -split-input-file -mlir-print-op-generic %s | mlir-opt -allow-unregistered-dialect | FileCheck %s func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x10xf32>) -> memref<10x10xf32> { %c0 = constant 0 : index @@ -203,59 +203,59 @@ func @testloopop() -> () { %idxValue = constant 8 : index acc.loop gang worker vector { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop gang(num=%i64Value: i64) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop gang(static=%i64Value: i64) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop worker(%i64Value: i64) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop worker(%i32Value: i32) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop worker(%idxValue: index) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop vector(%i64Value: i64) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop vector(%i32Value: i32) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop vector(%idxValue: index) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop gang(num=%i64Value: i64) worker vector { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop gang(num=%i64Value: i64, static=%i64Value: i64) worker(%i64Value: i64) vector(%i64Value: i64) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop gang(num=%i32Value: i32, static=%idxValue: index) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop tile(%i64Value: i64, %i64Value: i64) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } acc.loop tile(%i32Value: i32, %i32Value: i32) { - "some.op"() : () -> () + "test.openacc_dummy_op"() : () -> () acc.yield } return @@ -265,59 +265,59 @@ func @testloopop() -> () { // CHECK-NEXT: [[I32VALUE:%.*]] = constant 128 : i32 // CHECK-NEXT: [[IDXVALUE:%.*]] = constant 8 : index // CHECK: acc.loop gang worker vector { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop gang(num=[[I64VALUE]]: i64) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop gang(static=[[I64VALUE]]: i64) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop worker([[I64VALUE]]: i64) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop worker([[I32VALUE]]: i32) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop worker([[IDXVALUE]]: index) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop vector([[I64VALUE]]: i64) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop vector([[I32VALUE]]: i32) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop vector([[IDXVALUE]]: index) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop gang(num=[[I64VALUE]]: i64) worker vector { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop gang(num=[[I64VALUE]]: i64, static=[[I64VALUE]]: i64) worker([[I64VALUE]]: i64) vector([[I64VALUE]]: i64) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop gang(num=[[I32VALUE]]: i32, static=[[IDXVALUE]]: index) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop tile([[I64VALUE]]: i64, [[I64VALUE]]: i64) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK: acc.loop tile([[I32VALUE]]: i32, [[I32VALUE]]: i32) { -// CHECK-NEXT: "some.op"() : () -> () +// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield // CHECK-NEXT: } From 4fcd1a8e6528ca42fe656f2745e15d2b7f5de495 Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Tue, 4 Aug 2020 18:34:22 -0400 Subject: [PATCH 179/544] [llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking. This is mostly for the benefit of the LBR latency mode. Right now, it performs no checking. If this is run on non-supported hardware, it will produce all zeroes for latency. Differential Revision: https://reviews.llvm.org/D85254 --- .../tools/llvm-exegesis/X86/lbr/lit.local.cfg | 4 +- llvm/tools/llvm-exegesis/lib/Target.h | 5 ++ llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 17 +++++ .../llvm-exegesis/lib/X86/X86Counter.cpp | 65 +++++++++++++++---- llvm/tools/llvm-exegesis/lib/X86/X86Counter.h | 5 ++ llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 15 +---- 6 files changed, 86 insertions(+), 25 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg index 431967c1ec9b0..69b08f27c39a5 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg @@ -19,9 +19,9 @@ else: try: with open(os.devnull, 'w') as quiet: check_llvm_exegesis_uops_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) check_llvm_exegesis_latency_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) except OSError: print('could not exec llvm-exegesis') config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 70890795426d9..8a5624b42803a 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -142,6 +142,11 @@ class ExegesisTarget { return {&Instr}; } + // Checks hardware and software support for current benchmark mode. + // Returns an error if the target host does not have support to run the + // benchmark. + virtual Error checkFeatureSupport() const { return Error::success(); } + // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 9f045fa11aa24..270825a8777ba 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -674,6 +674,23 @@ class ExegesisX86Target : public ExegesisTarget { return Arch == Triple::x86_64 || Arch == Triple::x86; } + Error checkFeatureSupport() const override { + // LBR is the only feature we conditionally support now. + // So if LBR is not requested, then we should be able to run the benchmarks. + if (LbrSamplingPeriod == 0) + return Error::success(); + +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::checkLbrSupport(); +#else + return llvm::make_error( + "LBR not supported on this kernel and/or platform", + llvm::errc::not_supported); +#endif + } + static const unsigned kUnavailableRegisters[4]; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp index 57b493818aaad..25ec4f8586755 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -21,6 +21,7 @@ #endif // HAVE_LIBPFM #include +#include #include #include #include @@ -35,6 +36,8 @@ namespace llvm { namespace exegesis { +// Number of entries in the LBR. +static constexpr int kLbrEntries = 16; static constexpr size_t kBufferPages = 8; static const size_t kDataBufferSize = kBufferPages * getpagesize(); @@ -70,7 +73,6 @@ static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail, static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, const void *From, const void *To, llvm::SmallVector *CycleArray) { - assert(From != nullptr && To != nullptr); const char *DataPtr = DataBuf; while (DataPtr < DataBuf + DataSize) { struct perf_event_header Header; @@ -149,21 +151,47 @@ void X86LbrCounter::start() { ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */); } +llvm::Error X86LbrCounter::checkLbrSupport() { + // Do a sample read and check if the results contain non-zero values. + + X86LbrCounter counter(X86LbrPerfEvent(123)); + counter.start(); + + // Prevent the compiler from unrolling the loop and get rid of all the + // branches. We need at least 16 iterations. + int Sum = 0; + int V = 1; + + volatile int *P = &V; + auto TimeLimit = + std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5); + + for (int I = 0; + I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit; + ++I) { + Sum += *P; + } + + counter.stop(); + + auto ResultOrError = counter.doReadCounter(nullptr, nullptr); + if (ResultOrError) + if (!ResultOrError.get().empty()) + // If there is at least one non-zero entry, then LBR is supported. + for (const int64_t &Value : ResultOrError.get()) + if (Value != 0) + return Error::success(); + + return llvm::make_error( + "LBR format with cycles is not suppported on the host.", + llvm::errc::not_supported); +} + llvm::Expected> X86LbrCounter::readOrError(StringRef FunctionBytes) const { - // The max number of time-outs/retries before we give up. - static constexpr int kMaxTimeouts = 160; - // Disable the event before reading ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); - // Parses the LBR buffer and fills CycleArray with the sequence of cycle - // counts from the buffer. - llvm::SmallVector CycleArray; - std::unique_ptr DataBuf(new char[kDataBufferSize]); - int NumTimeouts = 0; - int PollResult = 0; - // Find the boundary of the function so that we could filter the LBRs // to keep only the relevant records. if (FunctionBytes.empty()) @@ -172,6 +200,21 @@ X86LbrCounter::readOrError(StringRef FunctionBytes) const { const void *From = reinterpret_cast(FunctionBytes.data()); const void *To = reinterpret_cast(FunctionBytes.data() + FunctionBytes.size()); + return doReadCounter(From, To); +} + +llvm::Expected> +X86LbrCounter::doReadCounter(const void *From, const void *To) const { + // The max number of time-outs/retries before we give up. + static constexpr int kMaxTimeouts = 160; + + // Parses the LBR buffer and fills CycleArray with the sequence of cycle + // counts from the buffer. + llvm::SmallVector CycleArray; + auto DataBuf = std::make_unique(kDataBufferSize); + int NumTimeouts = 0; + int PollResult = 0; + while (PollResult <= 0) { PollResult = pollLbrPerfEvent(FileDescriptor); if (PollResult > 0) diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h index 94062012917df..73e4dc5b990a0 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h @@ -33,6 +33,8 @@ class X86LbrPerfEvent : public pfm::PerfEvent { class X86LbrCounter : public pfm::Counter { public: + static llvm::Error checkLbrSupport(); + explicit X86LbrCounter(pfm::PerfEvent &&Event); virtual ~X86LbrCounter(); @@ -43,6 +45,9 @@ class X86LbrCounter : public pfm::Counter { readOrError(StringRef FunctionBytes) const override; private: + llvm::Expected> + doReadCounter(const void *From, const void *To) const; + void *MMappedBuffer = nullptr; }; diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index fb3f41e147348..bc2f348a7eaeb 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -160,12 +160,6 @@ static cl::opt cl::desc(""), cl::cat(AnalysisOptions), cl::init("")); -static cl::list - AllowedHostCpus("allowed-host-cpu", - cl::desc("If specified, only run the benchmark if the host " - "CPU matches the names"), - cl::cat(Options), cl::ZeroOrMore); - static cl::opt AnalysisDisplayUnstableOpcodes( "analysis-display-unstable-clusters", cl::desc("if there is more than one benchmark for an opcode, said " @@ -302,12 +296,9 @@ void benchmarkMain() { const LLVMState State(CpuName); - llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU(); - for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end(); - ++Begin) { - if (ActualCpu != *Begin) - ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu)); - } + // Preliminary check to ensure features needed for requested + // benchmark mode are present on target CPU and/or OS. + ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); const std::unique_ptr Runner = ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( From 719ab7309eb7b7b5d802273b0f1871d6cdb965b1 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 24 Sep 2020 17:01:24 -0700 Subject: [PATCH 180/544] scudo: Make it thread-safe to set some runtime configuration flags. Move some of the flags previously in Options, as well as the UseMemoryTagging flag previously in the primary allocator, into an atomic variable so that it can be updated while other threads are running. Relaxed accesses are used because we only have the requirement that the other threads see the new value eventually. The code is set up so that the variable is generally loaded once per allocation function call with the exception of some rarely used code such as error handlers. The flag bits can generally stay in a register during the execution of the allocation function which means that they can be branched on with minimal overhead (e.g. TBZ on aarch64). Differential Revision: https://reviews.llvm.org/D88523 --- .../lib/scudo/standalone/atomic_helpers.h | 14 +++ compiler-rt/lib/scudo/standalone/combined.h | 117 ++++++++++-------- compiler-rt/lib/scudo/standalone/options.h | 72 +++++++++++ compiler-rt/lib/scudo/standalone/primary32.h | 8 +- compiler-rt/lib/scudo/standalone/primary64.h | 16 +-- .../lib/scudo/standalone/wrappers_c.inc | 12 +- 6 files changed, 168 insertions(+), 71 deletions(-) create mode 100644 compiler-rt/lib/scudo/standalone/options.h diff --git a/compiler-rt/lib/scudo/standalone/atomic_helpers.h b/compiler-rt/lib/scudo/standalone/atomic_helpers.h index 0946a3308172c..d88f5d7be642e 100644 --- a/compiler-rt/lib/scudo/standalone/atomic_helpers.h +++ b/compiler-rt/lib/scudo/standalone/atomic_helpers.h @@ -89,6 +89,20 @@ inline typename T::Type atomic_fetch_sub(volatile T *A, typename T::Type V, return __atomic_fetch_sub(&A->ValDoNotUse, V, MO); } +template +inline typename T::Type atomic_fetch_and(volatile T *A, typename T::Type V, + memory_order MO) { + DCHECK(!(reinterpret_cast(A) % sizeof(*A))); + return __atomic_fetch_and(&A->ValDoNotUse, V, MO); +} + +template +inline typename T::Type atomic_fetch_or(volatile T *A, typename T::Type V, + memory_order MO) { + DCHECK(!(reinterpret_cast(A) % sizeof(*A))); + return __atomic_fetch_or(&A->ValDoNotUse, V, MO); +} + template inline typename T::Type atomic_exchange(volatile T *A, typename T::Type V, memory_order MO) { diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index e39871dc47043..2a891e44579ac 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -15,6 +15,7 @@ #include "flags_parser.h" #include "local_cache.h" #include "memtag.h" +#include "options.h" #include "quarantine.h" #include "report.h" #include "secondary.h" @@ -144,16 +145,19 @@ class Allocator { reportUnrecognizedFlags(); // Store some flags locally. - Options.MayReturnNull = getFlags()->may_return_null; - Options.FillContents = - getFlags()->zero_contents - ? ZeroFill - : (getFlags()->pattern_fill_contents ? PatternOrZeroFill : NoFill); - Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch; - Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch; - Options.TrackAllocationStacks = false; - Options.UseOddEvenTags = true; - Options.QuarantineMaxChunkSize = + if (getFlags()->may_return_null) + Primary.Options.set(OptionBit::MayReturnNull); + if (getFlags()->zero_contents) + Primary.Options.setFillContentsMode(ZeroFill); + else if (getFlags()->pattern_fill_contents) + Primary.Options.setFillContentsMode(PatternOrZeroFill); + if (getFlags()->dealloc_type_mismatch) + Primary.Options.set(OptionBit::DeallocTypeMismatch); + if (getFlags()->delete_size_mismatch) + Primary.Options.set(OptionBit::DeleteSizeMismatch); + Primary.Options.set(OptionBit::UseOddEvenTags); + + QuarantineMaxChunkSize = static_cast(getFlags()->quarantine_max_chunk_size); Stats.initLinkerInitialized(); @@ -250,8 +254,8 @@ class Allocator { #endif } - uptr computeOddEvenMaskForPointerMaybe(uptr Ptr, uptr Size) { - if (!Options.UseOddEvenTags) + uptr computeOddEvenMaskForPointerMaybe(Options Options, uptr Ptr, uptr Size) { + if (!Options.get(OptionBit::UseOddEvenTags)) return 0; // If a chunk's tag is odd, we want the tags of the surrounding blocks to be @@ -267,6 +271,7 @@ class Allocator { uptr Alignment = MinAlignment, bool ZeroContents = false) { initThreadMaybe(); + Options Options = Primary.Options.load(); #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.shouldSample())) { @@ -278,10 +283,10 @@ class Allocator { const FillContentsMode FillContents = ZeroContents ? ZeroFill : TSDRegistry.getDisableMemInit() ? NoFill - : Options.FillContents; + : Options.getFillContentsMode(); if (UNLIKELY(Alignment > MaxAlignment)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportAlignmentTooBig(Alignment, MaxAlignment); } @@ -300,7 +305,7 @@ class Allocator { // Takes care of extravagantly large sizes as well as integer overflows. static_assert(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment, ""); if (UNLIKELY(Size >= MaxAllowedMallocSize)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize); } @@ -336,7 +341,7 @@ class Allocator { FillContents); if (UNLIKELY(!Block)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportOutOfMemory(NeededSize); } @@ -359,7 +364,7 @@ class Allocator { // // When memory tagging is enabled, zeroing the contents is done as part of // setting the tag. - if (UNLIKELY(useMemoryTagging())) { + if (UNLIKELY(useMemoryTagging(Options))) { uptr PrevUserPtr; Chunk::UnpackedHeader Header; const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); @@ -424,10 +429,10 @@ class Allocator { } } else { const uptr OddEvenMask = - computeOddEvenMaskForPointerMaybe(BlockUptr, BlockSize); + computeOddEvenMaskForPointerMaybe(Options, BlockUptr, BlockSize); TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd); } - storeAllocationStackMaybe(Ptr); + storeAllocationStackMaybe(Options, Ptr); } else if (UNLIKELY(FillContents != NoFill)) { // This condition is not necessarily unlikely, but since memset is // costly, we might as well mark it as such. @@ -471,6 +476,7 @@ class Allocator { // the TLS destructors, ending up in initialized thread specific data never // being destroyed properly. Any other heap operation will do a full init. initThreadMaybe(/*MinimalInit=*/true); + Options Options = Primary.Options.load(); #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) { @@ -494,7 +500,7 @@ class Allocator { if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Deallocating, Ptr); - if (Options.DeallocTypeMismatch) { + if (Options.get(OptionBit::DeallocTypeMismatch)) { if (Header.OriginOrWasZeroed != Origin) { // With the exception of memalign'd chunks, that can be still be free'd. if (UNLIKELY(Header.OriginOrWasZeroed != Chunk::Origin::Memalign || @@ -505,19 +511,20 @@ class Allocator { } const uptr Size = getSize(Ptr, &Header); - if (DeleteSize && Options.DeleteSizeMismatch) { + if (DeleteSize && Options.get(OptionBit::DeleteSizeMismatch)) { if (UNLIKELY(DeleteSize != Size)) reportDeleteSizeMismatch(Ptr, DeleteSize, Size); } - quarantineOrDeallocateChunk(Ptr, &Header, Size); + quarantineOrDeallocateChunk(Options, Ptr, &Header, Size); } void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) { initThreadMaybe(); + Options Options = Primary.Options.load(); if (UNLIKELY(NewSize >= MaxAllowedMallocSize)) { - if (Options.MayReturnNull) + if (Options.get(OptionBit::MayReturnNull)) return nullptr; reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize); } @@ -552,7 +559,7 @@ class Allocator { // Pointer has to be allocated with a malloc-type function. Some // applications think that it is OK to realloc a memalign'ed pointer, which // will trigger this check. It really isn't. - if (Options.DeallocTypeMismatch) { + if (Options.get(OptionBit::DeallocTypeMismatch)) { if (UNLIKELY(OldHeader.OriginOrWasZeroed != Chunk::Origin::Malloc)) reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr, OldHeader.OriginOrWasZeroed, @@ -583,11 +590,11 @@ class Allocator { : BlockEnd - (reinterpret_cast(OldPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(ClassId && useMemoryTagging())) { + if (UNLIKELY(ClassId && useMemoryTagging(Options))) { resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, reinterpret_cast(OldTaggedPtr) + NewSize, BlockEnd); - storeAllocationStackMaybe(OldPtr); + storeAllocationStackMaybe(Options, OldPtr); } return OldTaggedPtr; } @@ -601,7 +608,7 @@ class Allocator { if (NewPtr) { const uptr OldSize = getSize(OldPtr, &OldHeader); memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); - quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize); + quarantineOrDeallocateChunk(Options, OldPtr, &OldHeader, OldSize); } return NewPtr; } @@ -682,7 +689,7 @@ class Allocator { if (getChunkFromBlock(Block, &Chunk, &Header) && Header.State == Chunk::State::Allocated) { uptr TaggedChunk = Chunk; - if (useMemoryTagging()) + if (useMemoryTagging(Primary.Options.load())) TaggedChunk = loadTag(Chunk); Callback(TaggedChunk, getSize(reinterpret_cast(Chunk), &Header), Arg); @@ -697,7 +704,7 @@ class Allocator { bool canReturnNull() { initThreadMaybe(); - return Options.MayReturnNull; + return Primary.Options.load().get(OptionBit::MayReturnNull); } bool setOption(Option O, sptr Value) { @@ -711,9 +718,9 @@ class Allocator { // any particular chunk is cut in half. Therefore we use this tuning // setting to control whether odd/even tags are enabled. if (Value == M_MEMTAG_TUNING_BUFFER_OVERFLOW) - Options.UseOddEvenTags = true; + Primary.Options.set(OptionBit::UseOddEvenTags); else if (Value == M_MEMTAG_TUNING_UAF) - Options.UseOddEvenTags = false; + Primary.Options.clear(OptionBit::UseOddEvenTags); return true; } else { // We leave it to the various sub-components to decide whether or not they @@ -773,18 +780,26 @@ class Allocator { Header.State == Chunk::State::Allocated; } - bool useMemoryTagging() { return Primary.useMemoryTagging(); } + bool useMemoryTagging() const { + return useMemoryTagging(Primary.Options.load()); + } + static bool useMemoryTagging(Options Options) { + return PrimaryT::useMemoryTagging(Options); + } void disableMemoryTagging() { Primary.disableMemoryTagging(); } void setTrackAllocationStacks(bool Track) { initThreadMaybe(); - Options.TrackAllocationStacks = Track; + if (Track) + Primary.Options.set(OptionBit::TrackAllocationStacks); + else + Primary.Options.clear(OptionBit::TrackAllocationStacks); } void setFillContents(FillContentsMode FillContents) { initThreadMaybe(); - Options.FillContents = FillContents; + Primary.Options.setFillContentsMode(FillContents); } const char *getStackDepotAddress() const { @@ -951,16 +966,7 @@ class Allocator { static const uptr MaxTraceSize = 64; u32 Cookie; - - struct { - u8 MayReturnNull : 1; // may_return_null - FillContentsMode FillContents : 2; // zero_contents, pattern_fill_contents - u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch - u8 DeleteSizeMismatch : 1; // delete_size_mismatch - u8 TrackAllocationStacks : 1; - u8 UseOddEvenTags : 1; - u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size - } Options; + u32 QuarantineMaxChunkSize; GlobalStats Stats; PrimaryT Primary; @@ -1025,15 +1031,15 @@ class Allocator { reinterpret_cast(Ptr) - SizeOrUnusedBytes; } - void quarantineOrDeallocateChunk(void *Ptr, Chunk::UnpackedHeader *Header, - uptr Size) { + void quarantineOrDeallocateChunk(Options Options, void *Ptr, + Chunk::UnpackedHeader *Header, uptr Size) { Chunk::UnpackedHeader NewHeader = *Header; - if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) { + if (UNLIKELY(NewHeader.ClassId && useMemoryTagging(Options))) { u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); if (!TSDRegistry.getDisableMemInit()) { uptr TaggedBegin, TaggedEnd; const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), + Options, reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), SizeClassMap::getSizeByClassId(NewHeader.ClassId)); // Exclude the previous tag so that immediate use after free is detected // 100% of the time. @@ -1041,14 +1047,14 @@ class Allocator { &TaggedEnd); } NewHeader.OriginOrWasZeroed = !TSDRegistry.getDisableMemInit(); - storeDeallocationStackMaybe(Ptr, PrevTag); + storeDeallocationStackMaybe(Options, Ptr, PrevTag); } // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. // Logical Or can be short-circuited, which introduces unnecessary // conditional jumps, so use bitwise Or and let the compiler be clever. - const bool BypassQuarantine = !Quarantine.getCacheSize() | !Size | - (Size > Options.QuarantineMaxChunkSize); + const bool BypassQuarantine = + !Quarantine.getCacheSize() | !Size | (Size > QuarantineMaxChunkSize); if (BypassQuarantine) { NewHeader.State = Chunk::State::Available; Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); @@ -1089,16 +1095,17 @@ class Allocator { return Offset + Chunk::getHeaderSize(); } - void storeAllocationStackMaybe(void *Ptr) { - if (!UNLIKELY(Options.TrackAllocationStacks)) + void storeAllocationStackMaybe(Options Options, void *Ptr) { + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; auto *Ptr32 = reinterpret_cast(Ptr); Ptr32[MemTagAllocationTraceIndex] = collectStackTrace(); Ptr32[MemTagAllocationTidIndex] = getThreadID(); } - void storeDeallocationStackMaybe(void *Ptr, uint8_t PrevTag) { - if (!UNLIKELY(Options.TrackAllocationStacks)) + void storeDeallocationStackMaybe(Options Options, void *Ptr, + uint8_t PrevTag) { + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; // Disable tag checks here so that we don't need to worry about zero sized diff --git a/compiler-rt/lib/scudo/standalone/options.h b/compiler-rt/lib/scudo/standalone/options.h new file mode 100644 index 0000000000000..4f387a37f4826 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/options.h @@ -0,0 +1,72 @@ +//===-- options.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_OPTIONS_H_ +#define SCUDO_OPTIONS_H_ + +#include "atomic_helpers.h" +#include "common.h" + +namespace scudo { + +enum class OptionBit { + MayReturnNull, + FillContents0of2, + FillContents1of2, + DeallocTypeMismatch, + DeleteSizeMismatch, + TrackAllocationStacks, + UseOddEvenTags, + UseMemoryTagging, +}; + +struct Options { + u32 Val; + + bool get(OptionBit Opt) const { return Val & (1U << static_cast(Opt)); } + + FillContentsMode getFillContentsMode() const { + return static_cast( + (Val >> static_cast(OptionBit::FillContents0of2)) & 3); + } +}; + +struct AtomicOptions { + atomic_u32 Val; + +public: + Options load() const { + return Options{atomic_load(&Val, memory_order_relaxed)}; + } + + void clear(OptionBit Opt) { + atomic_fetch_and(&Val, ~(1U << static_cast(Opt)), + memory_order_relaxed); + } + + void set(OptionBit Opt) { + atomic_fetch_or(&Val, 1U << static_cast(Opt), memory_order_relaxed); + } + + void setFillContentsMode(FillContentsMode FillContents) { + while (1) { + u32 Opts = atomic_load(&Val, memory_order_relaxed); + u32 NewOpts = Opts; + NewOpts &= ~(3U << static_cast(OptionBit::FillContents0of2)); + NewOpts |= static_cast(FillContents) + << static_cast(OptionBit::FillContents0of2); + if (atomic_compare_exchange_strong(&Val, &Opts, NewOpts, + memory_order_relaxed)) + break; + } + } +}; + +} // namespace scudo + +#endif // SCUDO_OPTIONS_H_ diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 0a985fb67beae..a159a584c7cb4 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -13,6 +13,7 @@ #include "common.h" #include "list.h" #include "local_cache.h" +#include "options.h" #include "release.h" #include "report.h" #include "stats.h" @@ -206,7 +207,10 @@ class SizeClassAllocator32 { return TotalReleasedBytes; } - bool useMemoryTagging() { return false; } + static bool useMemoryTagging(Options Options) { + (void)Options; + return false; + } void disableMemoryTagging() {} const char *getRegionInfoArrayAddress() const { return nullptr; } @@ -218,6 +222,8 @@ class SizeClassAllocator32 { return {}; } + AtomicOptions Options; + private: static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr RegionSize = 1UL << RegionSizeLog; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 933b1ee7c9670..1f7ac38cefeda 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -14,6 +14,7 @@ #include "list.h" #include "local_cache.h" #include "memtag.h" +#include "options.h" #include "release.h" #include "stats.h" #include "string_utils.h" @@ -93,8 +94,8 @@ class SizeClassAllocator64 { } setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); - if (SupportsMemoryTagging) - UseMemoryTagging = systemSupportsMemoryTagging(); + if (SupportsMemoryTagging && systemSupportsMemoryTagging()) + Options.set(OptionBit::UseMemoryTagging); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -207,10 +208,10 @@ class SizeClassAllocator64 { return TotalReleasedBytes; } - bool useMemoryTagging() const { - return SupportsMemoryTagging && UseMemoryTagging; + static bool useMemoryTagging(Options Options) { + return SupportsMemoryTagging && Options.get(OptionBit::UseMemoryTagging); } - void disableMemoryTagging() { UseMemoryTagging = false; } + void disableMemoryTagging() { Options.clear(OptionBit::UseMemoryTagging); } const char *getRegionInfoArrayAddress() const { return reinterpret_cast(RegionInfoArray); @@ -262,6 +263,8 @@ class SizeClassAllocator64 { return B; } + AtomicOptions Options; + private: static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; @@ -306,7 +309,6 @@ class SizeClassAllocator64 { uptr PrimaryBase; MapPlatformData Data; atomic_s32 ReleaseToOsIntervalMs; - bool UseMemoryTagging; alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; RegionInfo *getRegionInfo(uptr ClassId) { @@ -373,7 +375,7 @@ class SizeClassAllocator64 { if (UNLIKELY(!map(reinterpret_cast(RegionBeg + MappedUser), UserMapSize, "scudo:primary", MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging() ? MAP_MEMTAG : 0), + (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0), &Region->Data))) return nullptr; Region->MappedUser += UserMapSize; diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc index 7386a0053a0f9..9d640038d8e29 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -234,30 +234,26 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) { // Disable memory tagging for the heap. The caller must disable memory tag // checks globally (e.g. by clearing TCF0 on aarch64) before calling this -// function, and may not re-enable them after calling the function. The program -// must be single threaded at the point when the function is called. +// function, and may not re-enable them after calling the function. INTERFACE WEAK void SCUDO_PREFIX(malloc_disable_memory_tagging)() { SCUDO_ALLOCATOR.disableMemoryTagging(); } // Sets whether scudo records stack traces and other metadata for allocations // and deallocations. This function only has an effect if the allocator and -// hardware support memory tagging. The program must be single threaded at the -// point when the function is called. +// hardware support memory tagging. INTERFACE WEAK void SCUDO_PREFIX(malloc_set_track_allocation_stacks)(int track) { SCUDO_ALLOCATOR.setTrackAllocationStacks(track); } -// Sets whether scudo zero-initializes all allocated memory. The program must -// be single threaded at the point when the function is called. +// Sets whether scudo zero-initializes all allocated memory. INTERFACE WEAK void SCUDO_PREFIX(malloc_set_zero_contents)(int zero_contents) { SCUDO_ALLOCATOR.setFillContents(zero_contents ? scudo::ZeroFill : scudo::NoFill); } -// Sets whether scudo pattern-initializes all allocated memory. The program must -// be single threaded at the point when the function is called. +// Sets whether scudo pattern-initializes all allocated memory. INTERFACE WEAK void SCUDO_PREFIX(malloc_set_pattern_fill_contents)(int pattern_fill_contents) { SCUDO_ALLOCATOR.setFillContents( From 2ab87702231e193ca170aa8ad4caa9f98bc7ced1 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 30 Sep 2020 09:42:49 -0700 Subject: [PATCH 181/544] [test][SampleProfile][NewPM] Fix some tests under NPM --- llvm/test/Transforms/SampleProfile/branch.ll | 6 +++--- llvm/test/Transforms/SampleProfile/calls.ll | 4 ++-- llvm/test/Transforms/SampleProfile/discriminator.ll | 6 +++--- llvm/test/Transforms/SampleProfile/fnptr.ll | 8 ++++---- llvm/test/Transforms/SampleProfile/offset.ll | 4 ++-- llvm/test/Transforms/SampleProfile/propagate.ll | 4 ++-- llvm/test/Transforms/SampleProfile/remap.ll | 6 +++--- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/test/Transforms/SampleProfile/branch.ll b/llvm/test/Transforms/SampleProfile/branch.ll index 6e7a9eb7208cc..32c857c56d60c 100644 --- a/llvm/test/Transforms/SampleProfile/branch.ll +++ b/llvm/test/Transforms/SampleProfile/branch.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; Original C++ code for this test case: ; @@ -30,7 +30,7 @@ ; Function Attrs: uwtable define i32 @main(i32 %argc, i8** %argv) #0 !dbg !6 { -; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main': +; CHECK: Printing analysis {{.*}} for function 'main': entry: %retval = alloca i32, align 4 diff --git a/llvm/test/Transforms/SampleProfile/calls.ll b/llvm/test/Transforms/SampleProfile/calls.ll index f92bf547182a3..ecd188fcaf0e2 100644 --- a/llvm/test/Transforms/SampleProfile/calls.ll +++ b/llvm/test/Transforms/SampleProfile/calls.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/calls.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; Original C++ test case ; diff --git a/llvm/test/Transforms/SampleProfile/discriminator.ll b/llvm/test/Transforms/SampleProfile/discriminator.ll index 4908c313dc5c5..7e2d23ef018cf 100644 --- a/llvm/test/Transforms/SampleProfile/discriminator.ll +++ b/llvm/test/Transforms/SampleProfile/discriminator.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; Original code ; @@ -23,7 +23,7 @@ ; but the then branch (line 3.1) is only executed 5 times. define i32 @foo(i32 %i) #0 !dbg !4 { -; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo': +; CHECK: Printing analysis {{.*}} for function 'foo': entry: %i.addr = alloca i32, align 4 %x = alloca i32, align 4 diff --git a/llvm/test/Transforms/SampleProfile/fnptr.ll b/llvm/test/Transforms/SampleProfile/fnptr.ll index c22fc8403d875..94925b4a91706 100644 --- a/llvm/test/Transforms/SampleProfile/fnptr.ll +++ b/llvm/test/Transforms/SampleProfile/fnptr.ll @@ -2,11 +2,11 @@ ; formats. This checks that we produce the same profile annotations regardless ; of the profile format. ; -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob -enable-new-pm=0| FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; CHECK: edge for.body3 -> if.then probability is 0x1a56a56a / 0x80000000 = 20.58% ; CHECK: edge for.body3 -> if.else probability is 0x65a95a96 / 0x80000000 = 79.42% diff --git a/llvm/test/Transforms/SampleProfile/offset.ll b/llvm/test/Transforms/SampleProfile/offset.ll index 8451efe62b8ed..b537b24360977 100644 --- a/llvm/test/Transforms/SampleProfile/offset.ll +++ b/llvm/test/Transforms/SampleProfile/offset.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; Original C++ code for this test case: ; diff --git a/llvm/test/Transforms/SampleProfile/propagate.ll b/llvm/test/Transforms/SampleProfile/propagate.ll index b8db9f68c82ad..74a6a700a8e5e 100644 --- a/llvm/test/Transforms/SampleProfile/propagate.ll +++ b/llvm/test/Transforms/SampleProfile/propagate.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; Original C++ code for this test case: ; diff --git a/llvm/test/Transforms/SampleProfile/remap.ll b/llvm/test/Transforms/SampleProfile/remap.ll index b7b3fb16bb947..090f4b3fde3a6 100644 --- a/llvm/test/Transforms/SampleProfile/remap.ll +++ b/llvm/test/Transforms/SampleProfile/remap.ll @@ -1,15 +1,15 @@ -; RUN: opt %s -passes=sample-profile -sample-profile-file=%S/Inputs/remap.prof -sample-profile-remapping-file=%S/Inputs/remap.map | opt -analyze -branch-prob | FileCheck %s +; RUN: opt %s -passes=sample-profile -sample-profile-file=%S/Inputs/remap.prof -sample-profile-remapping-file=%S/Inputs/remap.map | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; ; Check whether profile remapping work with loading profile on demand used by extbinary format profile. ; RUN: llvm-profdata merge -sample -extbinary %S/Inputs/remap.prof -o %t.extbinary.afdo -; RUN: opt %s -passes=sample-profile -sample-profile-file=%t.extbinary.afdo -sample-profile-remapping-file=%S/Inputs/remap.map | opt -analyze -branch-prob | FileCheck %s +; RUN: opt %s -passes=sample-profile -sample-profile-file=%t.extbinary.afdo -sample-profile-remapping-file=%S/Inputs/remap.map | opt -passes='print' -disable-output 2>&1 | FileCheck %s ; ; Reduced from branch.ll declare i1 @foo() define void @_ZN3foo3barERKN1M1XINS_6detail3quxEEE() #0 !dbg !2 { -; CHECK: Printing analysis 'Branch Probability Analysis' for function '_ZN3foo3barERKN1M1XINS_6detail3quxEEE': +; CHECK: Printing analysis {{.*}} for function '_ZN3foo3barERKN1M1XINS_6detail3quxEEE': entry: %cmp = call i1 @foo(), !dbg !6 From 73fb9698c0573778787e77a8ffa57e7fa3caebd4 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 30 Sep 2020 18:56:52 +0200 Subject: [PATCH 182/544] [asan][test] Several Posix/unpoison-alternate-stack.cpp fixes `Posix/unpoison-alternate-stack.cpp` currently `FAIL`s on Solaris/i386. Some of the problems are generic: - `clang` warns compiling the testcase: compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp:83:7: warning: nested designators are a C99 extension [-Wc99-designator] .sa_sigaction = signalHandler, ^~~~~~~~~~~~~ compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp:84:7: warning: ISO C++ requires field designators to be specified in declaration order; field '_funcptr' will be initialized after field 'sa_flags' [-Wreorder-init-list] .sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and some more instances. This can all easily be avoided by initializing each field separately. - The test `SEGV`s in `__asan_memcpy`. The default Solaris/i386 stack size is only 4 kB, while `__asan_memcpy` tries to allocate either 5436 (32-bit) or 10688 bytes (64-bit) on the stack. This patch avoids this by requiring at least 16 kB stack size. - Even without `-fsanitize=address` I get an assertion failure: Assertion failed: !isOnSignalStack(), file compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp, line 117 The fundamental problem with this testcase is that `longjmp` from a signal handler is highly unportable; XPG7 strongly warns against it and it is thus unspecified which stack is used when `longjmp`ing from a signal handler running on an alternative stack. So I'm `XFAIL`ing this testcase on Solaris. Tested on `amd64-pc-solaris2.11` and `x86_64-pc-linux-gnu`. Differential Revision: https://reviews.llvm.org/D88501 --- .../Posix/unpoison-alternate-stack.cpp | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp index d684810201c49..4774993cdf328 100644 --- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp @@ -7,7 +7,10 @@ // RUN: %run %t // XFAIL: ios && !iossim +// longjmp from signal handler is unportable. +// XFAIL: solaris +#include #include #include #include @@ -83,10 +86,9 @@ void signalHandler(int, siginfo_t *, void *) { void setSignalAlternateStack(void *AltStack) { sigaltstack((stack_t const *)AltStack, nullptr); - struct sigaction Action = { - .sa_sigaction = signalHandler, - .sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK, - }; + struct sigaction Action = {}; + Action.sa_sigaction = signalHandler; + Action.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK; sigemptyset(&Action.sa_mask); sigaction(SIGUSR1, &Action, nullptr); @@ -137,9 +139,11 @@ void *threadFun(void *AltStack) { // reports when the stack is reused. int main() { size_t const PageSize = sysconf(_SC_PAGESIZE); + // The Solaris defaults of 4k (32-bit) and 8k (64-bit) are too small. + size_t const MinStackSize = std::max(PTHREAD_STACK_MIN, 16 * 1024); // To align the alternate stack, we round this up to page_size. size_t const DefaultStackSize = - (PTHREAD_STACK_MIN - 1 + PageSize) & ~(PageSize - 1); + (MinStackSize - 1 + PageSize) & ~(PageSize - 1); // The alternate stack needs a certain size, or the signal handler segfaults. size_t const AltStackSize = 10 * PageSize; size_t const MappingSize = DefaultStackSize + AltStackSize; @@ -149,11 +153,10 @@ int main() { MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - stack_t const AltStack = { - .ss_sp = (char *)Mapping + DefaultStackSize, - .ss_flags = 0, - .ss_size = AltStackSize, - }; + stack_t AltStack = {}; + AltStack.ss_sp = (char *)Mapping + DefaultStackSize; + AltStack.ss_flags = 0; + AltStack.ss_size = AltStackSize; pthread_t Thread; pthread_attr_t ThreadAttr; From 8d8cb1ad80b7074ac60d070fae89261894d34a0d Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Wed, 30 Sep 2020 13:03:14 -0400 Subject: [PATCH 183/544] [AArch64] Avoid pairing loads when the base reg is modified When pairing loads, we should check if in between the two loads the base register has been modified. If that is the case then avoid pairing them because the second load actually loads from a different address. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D86956 --- .../AArch64/AArch64LoadStoreOptimizer.cpp | 9 ++ .../AArch64/aarch64-ldst-modified-baseReg.mir | 105 ++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index ea2e848d18ce5..e07e724b7b0c4 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1564,6 +1564,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, continue; } + // If the BaseReg has been modified, then we cannot do the optimization. + // For example, in the following pattern + // ldr x1 [x2] + // ldr x2 [x3] + // ldr x4 [x2, #8], + // the first and third ldr cannot be converted to ldp x1, x4, [x2] + if (!ModifiedRegUnits.available(BaseReg)) + return E; + // If the Rt of the second instruction was not modified or used between // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the diff --git a/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir b/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir new file mode 100644 index 0000000000000..54e5f394bf47e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir @@ -0,0 +1,105 @@ +# RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -run-pass=aarch64-ldst-opt %s -o - | FileCheck %s +# +# When the AArch64 Load Store Optimization pass tries to convert load instructions +# into a ldp instruction, and when the base register of the second ldr instruction +# has been modified in between these two ldr instructions, the conversion should not +# occur. +# +# For example, for the following pattern: +# ldr x9 [x10] +# ldr x10 [x8] +# ldr x10 [x10, 8], +# the first and third ldr instructions cannot be converted to ldp x9, x10, [x10]. +# +# CHECK-LABEL: name: ldr-modified-baseReg-no-ldp1 +# CHECK-NOT: LDP +# CHECK: $x9 = LDRXui $x10, 1 :: (load 8) +# CHECK: $x10 = LDURXi $x8, 1 :: (load 8) +# CHECK: $x10 = LDRXui $x10, 0 :: (load 8) +# CHECK: RET +--- +name: ldr-modified-baseReg-no-ldp1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x8, $x10 + + $x9 = LDRXui $x10, 1 :: (load 8) + $x10 = LDURXi $x8, 1 :: (load 8) + $x10 = LDRXui $x10, 0 :: (load 8) + RET undef $lr, implicit undef $w0 +... + +# CHECK-LABEL: name: str-modified-baseReg-no-stp1 +# CHECK-NOT: STP +# CHECK: STRXui $x9, $x10, 1 :: (store 8) +# CHECK: $x10 = LDRXui $x8, 0 :: (load 8) +# CHECK: STRXui $x10, $x10, 0 :: (store 8) +# CHECK: RET +--- +name: str-modified-baseReg-no-stp1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x9, $x8, $x10 + + STRXui $x9, $x10, 1 :: (store 8) + $x10 = LDRXui $x8, 0 :: (load 8) + STRXui $x10, $x10, 0 :: (store 8) + RET undef $lr, implicit undef $w0 +... + +# CHECK-LABEL: name: ldr-modified-baseReg-no-ldp2 +# CHECK-NOT: LDP +# CHECK: $x9 = LDRXui $x10, 1 :: (load 8) +# CHECK: $x10 = MOVi64imm 13 +# CHECK: $x11 = LDRXui $x10, 0 :: (load 8) +# CHECK: RET +--- +name: ldr-modified-baseReg-no-ldp2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x8, $x10 + + $x9 = LDRXui $x10, 1 :: (load 8) + $x10 = MOVi64imm 13 + $x11 = LDRXui $x10, 0 :: (load 8) + RET undef $lr, implicit undef $w0 +... + +# CHECK-LABEL: name: ldr-modified-baseReg-no-ldp3 +# CHECK-NOT: LDP +# CHECK: $x9 = LDRXui $x10, 1 :: (load 8) +# CHECK: $x10 = ADDXri $x8, $x11, 0 +# CHECK: $x12 = LDRXui $x10, 0 :: (load 8) +# CHECK: RET +--- +name: ldr-modified-baseReg-no-ldp3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x8, $x10, $x11 + + $x9 = LDRXui $x10, 1 :: (load 8) + $x10 = ADDXri $x8, $x11, 0 + $x12 = LDRXui $x10, 0 :: (load 8) + RET undef $lr, implicit undef $w0 +... + +# CHECK-LABEL: name: ldr-modified-baseAddr-convert-to-ldp +# CHECK: $x12, $x9 = LDPXi $x10, 0 :: (load 8) +# CHECK: STRXui $x11, $x10, 1 :: (store 8) +# CHECK: RET +--- +name: ldr-modified-baseAddr-convert-to-ldp +tracksRegLiveness: true +body: | + bb.0: + liveins: $x8, $x10, $x11 + + $x9 = LDRXui $x10, 1 :: (load 8) + STRXui $x11, $x10, 1 :: (store 8) + $x12 = LDRXui $x10, 0 :: (load 8) + RET undef $lr, implicit undef $w0 +... From 187686bea3878c0bf2b150d784e7eab223434e25 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 30 Sep 2020 13:18:42 -0400 Subject: [PATCH 184/544] [CodeGen] add test for NAN creation; NFC This goes with the APFloat change proposed in D88238. This is copied from the MIPS-specific test in builtin-nan-legacy.c to verify that the normal behavior is correct on other targets without the complication of an inverted quiet bit. --- clang/test/CodeGen/builtin-nan-exception.c | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 clang/test/CodeGen/builtin-nan-exception.c diff --git a/clang/test/CodeGen/builtin-nan-exception.c b/clang/test/CodeGen/builtin-nan-exception.c new file mode 100644 index 0000000000000..2acf0c4390ec8 --- /dev/null +++ b/clang/test/CodeGen/builtin-nan-exception.c @@ -0,0 +1,23 @@ +// RUN: %clang -target aarch64 -emit-llvm -S %s -o - | FileCheck %s +// RUN: %clang -target lanai -emit-llvm -S %s -o - | FileCheck %s +// RUN: %clang -target riscv64 -emit-llvm -S %s -o - | FileCheck %s +// RUN: %clang -target x86_64 -emit-llvm -S %s -o - | FileCheck %s + +// Run a variety of targets to ensure there's no target-based difference. + +// The builtin always produces a 64-bit (double). +// An SNaN with no payload is formed by setting the bit after the +// the quiet bit (MSB of the significand). + +// CHECK: float 0x7FF8000000000000, float 0x7FF4000000000000 +// CHECK: double 0x7FF8000000000000, double 0x7FF4000000000000 + +float f[] = { + __builtin_nan(""), + __builtin_nans(""), +}; + +double d[] = { + __builtin_nan(""), + __builtin_nans(""), +}; From 700e63293eea4a23440f300b1e9125ca2e80c6e9 Mon Sep 17 00:00:00 2001 From: Alexandre Rames Date: Wed, 30 Sep 2020 18:11:14 +0100 Subject: [PATCH 185/544] [Sema] Support Comma operator for fp16 vectors. The current half vector was enforcing an assert expecting "(LHS is half vector) == (RHS is half vector)" for comma. Reviewed By: ahatanak, fhahn Differential Revision: https://reviews.llvm.org/D88265 --- clang/lib/Sema/SemaExpr.cpp | 7 ++++--- clang/test/Sema/fp16vec-sema.c | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index a7c076657fb52..22840dd3dfe3c 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13940,9 +13940,10 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, // float vectors and truncating the result back to half vector. For now, we do // this only when HalfArgsAndReturn is set (that is, when the target is arm or // arm64). - assert(isVector(RHS.get()->getType(), Context.HalfTy) == - isVector(LHS.get()->getType(), Context.HalfTy) && - "both sides are half vectors or neither sides are"); + assert( + (Opc == BO_Comma || isVector(RHS.get()->getType(), Context.HalfTy) == + isVector(LHS.get()->getType(), Context.HalfTy)) && + "both sides are half vectors or neither sides are"); ConvertHalfVec = needsConversionOfHalfVec(ConvertHalfVec, Context, LHS.get(), RHS.get()); diff --git a/clang/test/Sema/fp16vec-sema.c b/clang/test/Sema/fp16vec-sema.c index aefb5f86a14bf..f61ad4c91e89d 100644 --- a/clang/test/Sema/fp16vec-sema.c +++ b/clang/test/Sema/fp16vec-sema.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify %s typedef __fp16 half4 __attribute__ ((vector_size (8))); typedef float float4 __attribute__ ((vector_size (16))); @@ -28,6 +28,8 @@ void testFP16Vec(int c) { sv0 = hv0 >= hv1; sv0 = hv0 || hv1; // expected-error{{logical expression with vector types 'half4' (vector of 4 '__fp16' values) and 'half4' is only supported in C++}} sv0 = hv0 && hv1; // expected-error{{logical expression with vector types 'half4' (vector of 4 '__fp16' values) and 'half4' is only supported in C++}} + hv0, 1; + 1, hv0; // Implicit conversion between half vectors and float vectors are not allowed. hv0 = fv0; // expected-error{{assigning to}} From 892df30a7f344b6cb9995710efbc94bb25cfb95b Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 30 Sep 2020 10:48:00 -0700 Subject: [PATCH 186/544] Fix interaction of `constinit` and `weak`. We previously took a shortcut and said that weak variables never have constant initializers (because those initializers are never correct to use outside the variable). We now say that weak variables can have constant initializers, but are never usable in constant expressions. --- clang/lib/AST/Decl.cpp | 8 ++++---- clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/Sema/SemaDeclCXX.cpp | 2 +- clang/test/SemaCXX/cxx20-constinit.cpp | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 clang/test/SemaCXX/cxx20-constinit.cpp diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index c96450b8a3776..a6c7f30528eb4 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2287,6 +2287,10 @@ bool VarDecl::mightBeUsableInConstantExpressions(ASTContext &C) const { if (isa(this)) return false; + // The values of weak variables are never usable in constant expressions. + if (isWeak()) + return false; + // In C++11, any variable of reference type can be used in a constant // expression if it is initialized by a constant expression. if (Lang.CPlusPlus11 && getType()->isReferenceType()) @@ -2414,10 +2418,6 @@ bool VarDecl::isInitICE() const { } bool VarDecl::checkInitIsICE() const { - // Initializers of weak variables are never ICEs. - if (isWeak()) - return false; - EvaluatedStmt *Eval = ensureEvaluatedStmt(); if (Eval->CheckedICE) // We have already checked whether this subexpression is an diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3bc649b96990d..b17eed2dc823d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14816,7 +14816,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { const VarDecl *VD; // Look for a declaration of this variable that has an initializer, and // check whether it is an ICE. - if (Dcl->getAnyInitializer(VD) && VD->checkInitIsICE()) + if (Dcl->getAnyInitializer(VD) && !VD->isWeak() && VD->checkInitIsICE()) return NoDiag(); else return ICEDiag(IK_NotICE, cast(E)->getLocation()); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 2d2b80573a696..1275fc0c95b56 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -11112,7 +11112,7 @@ QualType Sema::CheckComparisonCategoryType(ComparisonCategoryType Kind, // might be foobar, including it failing to be a constant expression. // TODO Handle more ways the lookup or result can be invalid. if (!VD->isStaticDataMember() || !VD->isConstexpr() || !VD->hasInit() || - !VD->checkInitIsICE()) + VD->isWeak() || !VD->checkInitIsICE()) return UnsupportedSTLError(USS_InvalidMember, MemName, VD); // Attempt to evaluate the var decl as a constant expression and extract diff --git a/clang/test/SemaCXX/cxx20-constinit.cpp b/clang/test/SemaCXX/cxx20-constinit.cpp new file mode 100644 index 0000000000000..a572b91289072 --- /dev/null +++ b/clang/test/SemaCXX/cxx20-constinit.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 %s -std=c++20 -verify +// expected-no-diagnostics + +constinit int a __attribute__((weak)) = 0; From 9d2378b59150f6f1cb5c9cf42ea06b0bb57029a1 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 30 Sep 2020 13:10:32 -0400 Subject: [PATCH 187/544] [OpenMP] Add Error Handling for Conflicting Pointer Sizes for Target Offload Summary: This patch adds an error to Clang that detects if OpenMP offloading is used between two architectures with incompatible pointer sizes. This ensures that the data mapping can be done correctly and solves an issue in code generation generating the wrong size pointer. Reviewer: jdoerfert Subscribers: Tags: #OpenMP #Clang Differential Revision: --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 1 + clang/lib/Frontend/CompilerInvocation.cpp | 8 ++++++++ ...get_parallel_reduction_codegen_tbaa_PR46146.cpp | 4 ++-- .../target_incompatible_architecture_messages.cpp | 14 ++++++++++++++ 4 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 clang/test/OpenMP/target_incompatible_architecture_messages.cpp diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3bf1bb19b7ae3..29bc19e5a84e5 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -253,6 +253,7 @@ def err_drv_optimization_remark_format : Error< "unknown remark serializer format: '%0'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">; +def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">; def err_drv_omp_host_ir_file_not_found : Error< "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">; def err_drv_omp_host_target_not_supported : Error< diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index b402f53cc765b..bbdf0e3be7ae0 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3206,6 +3206,14 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); + else if ((T.isArch64Bit() && TT.isArch32Bit()) || + (T.isArch64Bit() && TT.isArch16Bit()) || + (T.isArch32Bit() && TT.isArch64Bit()) || + (T.isArch32Bit() && TT.isArch16Bit()) || + (T.isArch16Bit() && TT.isArch32Bit()) || + (T.isArch16Bit() && TT.isArch64Bit())) + Diags.Report(diag::err_drv_incompatible_omp_arch) + << A->getValue(i) << T.str(); else Opts.OMPTargetTriples.push_back(TT); } diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index aefe00f1cadf9..031c7b6c778e4 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/clang/test/OpenMP/target_incompatible_architecture_messages.cpp b/clang/test/OpenMP/target_incompatible_architecture_messages.cpp new file mode 100644 index 0000000000000..f0f9d236d764d --- /dev/null +++ b/clang/test/OpenMP/target_incompatible_architecture_messages.cpp @@ -0,0 +1,14 @@ +// RUN: not %clang_cc1 -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -x c++ -fopenmp -triple i386-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -o - %s 2>&1 | FileCheck %s +// CHECK: error: OpenMP target architecture '{{.+}}' pointer size is incompatible with host '{{.+}}' +#ifndef HEADER +#define HEADER + +void test() { +#pragma omp target + {} +} + +#endif From 90eaedda9b8ef46e2c0c1b8bce33e98a3adbb68c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Sep 2020 10:23:14 -0400 Subject: [PATCH 188/544] [OpenMP] Replace OpenMP RTL Functions With OMPIRBuilder and OMPKinds.def Summary: Replace the OpenMP Runtime Library functions used in CGOpenMPRuntimeGPU for OpenMP device code generation with ones in OMPKinds.def and use OMPIRBuilder for generating runtime calls. This allows us to consolidate more OpenMP code generation into the OMPIRBuilder. This patch also invalidates specifying target architectures with conflicting pointer sizes. Reviewers: jdoerfert Subscribers: aaron.ballman cfe-commits guansong llvm-commits sstefan1 yaxunl Tags: #OpenMP #Clang #LLVM Differential Revision: https://reviews.llvm.org/D88430 --- clang/lib/CodeGen/CGOpenMPRuntime.h | 5 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 590 +++--------------- clang/lib/CodeGen/CodeGenModule.h | 10 - clang/test/OpenMP/nvptx_parallel_codegen.cpp | 8 +- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 55 +- llvm/test/Transforms/OpenMP/add_attributes.ll | 338 +++++----- 6 files changed, 319 insertions(+), 687 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 41fa9f5345aa8..e39c2e11390e1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -306,6 +306,9 @@ class CGOpenMPRuntime { CodeGenModule &CGM; StringRef FirstSeparator, Separator; + /// An OpenMP-IR-Builder instance. + llvm::OpenMPIRBuilder OMPBuilder; + /// Constructor allowing to redefine the name separator for the variables. explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator); @@ -386,8 +389,6 @@ class CGOpenMPRuntime { llvm::Value *getCriticalRegionLock(StringRef CriticalName); private: - /// An OpenMP-IR-Builder instance. - llvm::OpenMPIRBuilder OMPBuilder; /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap OpenMPDebugLocMapTy; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index d9ef6c2a10789..dbd24d33cc376 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -28,96 +28,6 @@ using namespace CodeGen; using namespace llvm::omp; namespace { -enum OpenMPRTLFunctionNVPTX { - /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_kernel_init, - /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - OMPRTL_NVPTX__kmpc_kernel_deinit, - /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, - /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function); - OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function); - OMPRTL_NVPTX__kmpc_kernel_parallel, - /// Call to void __kmpc_kernel_end_parallel(); - OMPRTL_NVPTX__kmpc_kernel_end_parallel, - /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_serialized_parallel, - /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_end_serialized_parallel, - /// Call to int32_t __kmpc_shuffle_int32(int32_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int32, - /// Call to int64_t __kmpc_shuffle_int64(int64_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int64, - /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, - /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, void *global_buffer, int32_t num_of_records, void* - /// reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - /// *buffer, int idx, void *reduce_data)); - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, - /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); - OMPRTL_NVPTX__kmpc_end_reduce_nowait, - /// Call to void __kmpc_data_sharing_init_stack(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack, - /// Call to void __kmpc_data_sharing_init_stack_spmd(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, - /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, - /// int16_t UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, - /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t - /// UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_push_stack, - /// Call to void __kmpc_data_sharing_pop_stack(void *a); - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, - /// Call to void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - OMPRTL_NVPTX__kmpc_begin_sharing_variables, - /// Call to void __kmpc_end_sharing_variables(); - OMPRTL_NVPTX__kmpc_end_sharing_variables, - /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) - OMPRTL_NVPTX__kmpc_get_shared_variables, - /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_parallel_level, - /// Call to int8_t __kmpc_is_spmd_exec_mode(); - OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, - /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - /// const void *buf, size_t size, int16_t is_shared, const void **res); - OMPRTL_NVPTX__kmpc_get_team_static_memory, - /// Call to void __kmpc_restore_team_static_memory(int16_t - /// isSPMDExecutionMode, int16_t is_shared); - OMPRTL_NVPTX__kmpc_restore_team_static_memory, - /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_barrier, - /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL__kmpc_barrier_simple_spmd, - /// Call to int32_t __kmpc_warp_active_thread_mask(void); - OMPRTL_NVPTX__kmpc_warp_active_thread_mask, - /// Call to void __kmpc_syncwarp(int32_t Mask); - OMPRTL_NVPTX__kmpc_syncwarp, -}; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { @@ -1243,13 +1153,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_init), + Args); // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); emitGenericVarsProlog(CGF, WST.Loc); } @@ -1272,8 +1182,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF, // Signal termination condition. // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_deinit), + Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -1347,13 +1258,14 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), /*RequiresDataSharing=*/Bld.getInt16(0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), + Args); if (RequiresFullRuntime) { // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); } CGF.EmitBranch(ExecuteBB); @@ -1379,9 +1291,9 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF, // DeInitialize the OMP state in the runtime; called by all active threads. llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2), + Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1415,7 +1327,7 @@ void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) { } void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, - WorkerFunctionState &WST) { + WorkerFunctionState &WST) { // // The workers enter this loop and wait for parallel work from the master. // When the master encounters a parallel region it sets up the work + variable @@ -1450,8 +1362,10 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {WorkFn.getPointer()}; - llvm::Value *Ret = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); + llvm::Value *Ret = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_parallel), + Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. @@ -1516,9 +1430,9 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // Signal end of parallel region. CGF.EmitBlock(TerminateBB); - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), - llvm::None); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -1533,328 +1447,6 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, clearLocThreadIdInsertPt(CGF); } -/// Returns specified OpenMP runtime function for the current OpenMP -/// implementation. Specialized for the NVPTX device. -/// \param Function OpenMP runtime function. -/// \return Specified function. -llvm::FunctionCallee -CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { - llvm::FunctionCallee RTLFn = nullptr; - switch (static_cast(Function)) { - case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t - // RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_deinit: { - // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { - // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { - // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { - /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; - llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); - auto *FnTy = - llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { - /// Build void __kmpc_kernel_end_parallel(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_serialized_parallel: { - // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { - // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int32: { - // Build int32_t __kmpc_shuffle_int32(int32_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int64: { - // Build int64_t __kmpc_shuffle_int64(int64_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { - // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, - // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* - // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t - // lane_id, int16_t lane_offset, int16_t Algorithm Version), void - // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { - // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - // global_tid, void *global_buffer, int32_t num_of_records, void* - // reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t shortCircuit), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - // *buffer, int idx, void *reduce_data)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, - CGM.VoidPtrTy}; - auto *GlobalListFnTy = - llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { - /// Build void __kmpc_data_sharing_init_stack(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { - /// Build void __kmpc_data_sharing_init_stack_spmd(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { - // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, - // int16_t UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { - // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t - // UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { - // Build void __kmpc_data_sharing_pop_stack(void *a); - llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, - /*Name=*/"__kmpc_data_sharing_pop_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { - /// Build void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_end_sharing_variables: { - /// Build void __kmpc_end_sharing_variables(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_get_shared_variables: { - /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_parallel_level: { - // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); - break; - } - case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { - // Build int8_t __kmpc_is_spmd_exec_mode(); - auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); - break; - } - case OMPRTL_NVPTX__kmpc_get_team_static_memory: { - // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - // const void *buf, size_t size, int16_t is_shared, const void **res); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, - CGM.Int16Ty, CGM.VoidPtrPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); - break; - } - case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { - // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, - // int16_t is_shared); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); - break; - } - case OMPRTL__kmpc_barrier: { - // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = - CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - break; - } - case OMPRTL__kmpc_barrier_simple_spmd: { - // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateConvergentRuntimeFunction( - FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { - // Build int32_t __kmpc_warp_active_thread_mask(void); - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); - break; - } - case OMPRTL_NVPTX__kmpc_syncwarp: { - // Build void __kmpc_syncwarp(kmp_int32 Mask); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); - break; - } - } - return RTLFn; -} - void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, @@ -2157,12 +1749,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); IsTTD = Bld.CreateIsNull(PL); } - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2196,8 +1790,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2259,9 +1853,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.Int16Ty, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_get_team_static_memory), - GlobalRecordSizeArg); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), + GlobalRecordSizeArg); GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; @@ -2288,10 +1883,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - IsInTTDRegion - ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack - : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack + : OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2390,8 +1985,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2419,7 +2014,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), Addr); } if (I->getSecond().GlobalRecordAddr) { @@ -2434,8 +2030,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { @@ -2456,14 +2052,15 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), IsInSharedMemory}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_restore_team_static_memory), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), Args); } } else { - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); } } } @@ -2535,9 +2132,11 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2553,7 +2152,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Prepare for parallel region. Indicate the outlined function. llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel), Args); // Create a private scope that will globalize the arguments @@ -2570,9 +2170,10 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *DataSharingArgs[] = { SharedArgsPtr, llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_begin_sharing_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables), + DataSharingArgs); // Store variable address in a list of references to pass to workers. unsigned Idx = 0; @@ -2606,8 +2207,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( syncCTAThreads(CGF); if (!CapturedVars.empty()) - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_sharing_variables)); // Remember for post-processing in worker loop. Work.emplace_back(WFn); @@ -2631,8 +2232,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2640,7 +2242,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); Bld.CreateCondBr(Res, SeqBB, MasterBB); @@ -2704,9 +2307,11 @@ void CGOpenMPRuntimeGPU::emitSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2736,9 +2341,9 @@ void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); - Call->setConvergent(); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd), + Args); } void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, @@ -2752,9 +2357,10 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); - Call->setConvergent(); + + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier), + Args); } void CGOpenMPRuntimeGPU::emitCriticalRegion( @@ -2770,8 +2376,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the mask of active threads in the warp. - llvm::Value *Mask = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); + llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = RT.getGPUThreadID(CGF); @@ -2813,8 +2419,9 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); // Reconverge active threads in the warp. - (void)CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); + (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_syncwarp), + Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2864,14 +2471,15 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeGPU &RT = *(static_cast(&CGM.getOpenMPRuntime())); + llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder(); CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); assert(Size.getQuantity() <= 8 && "Unsupported bitwidth in shuffle instruction."); - OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 - ? OMPRTL_NVPTX__kmpc_shuffle_int32 - : OMPRTL_NVPTX__kmpc_shuffle_int64; + RuntimeFunction ShuffleFn = Size.getQuantity() <= 4 + ? OMPRTL___kmpc_shuffle_int32 + : OMPRTL___kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. QualType CastTy = CGF.getContext().getIntTypeForBitwidth( @@ -2881,7 +2489,8 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( - RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn), + {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -4391,8 +4000,8 @@ void CGOpenMPRuntimeGPU::emitReduction( InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2), Args); } else { assert(TeamsReduction && "expected teams reduction."); @@ -4441,8 +4050,8 @@ void CGOpenMPRuntimeGPU::emitReduction( BufferToGlobalRedFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -4477,7 +4086,8 @@ void CGOpenMPRuntimeGPU::emitReduction( RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -4488,7 +4098,7 @@ void CGOpenMPRuntimeGPU::emitReduction( const VarDecl * CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const { if (!NativeParam->getType()->isReferenceType()) return NativeParam; QualType ArgType = NativeParam->getType(); @@ -4638,9 +4248,9 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 19085b582f5a0..088ed2830fb81 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1068,16 +1068,6 @@ class CodeGenModule : public CodeGenTypeCache { llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false, bool AssumeConvergent = false); - /// Create or return a runtime function declaration with the specified type - /// and name. This will automatically add the convergent attribute to the - /// function declaration. - llvm::FunctionCallee CreateConvergentRuntimeFunction( - llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeList ExtraAttrs = llvm::AttributeList(), - bool Local = false) { - return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true); - } - /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index ad25e0d775d12..bd9c988d46e7a 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -91,7 +91,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] -// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] +// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 @@ -321,10 +321,10 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) // CHECK: ret void -// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]] +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]] // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}( @@ -377,6 +377,6 @@ int bar(int n){ // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] -// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}} +// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}} #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index e93f836ea3fad..ff5e69df32616 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -220,6 +220,9 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr) __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr) __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /* kmp_task_t */ VoidPtr) +__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16) +__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32) +__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr) #undef __OMP_FUNCTION_TYPE #undef OMP_FUNCTION_TYPE @@ -311,8 +314,6 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) -__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr, Int32, /* kmp_task_affinity_info_t */ VoidPtr) @@ -518,17 +519,42 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// OpenMP Device runtime functions +__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16) +__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16) +__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16) +__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) +__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) +__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32) +__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, + GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) + +__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) __OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, - Int16) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) + +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) - -/// Note that device runtime functions (in the following) do not necessarily -/// need attributes as we expect to see the definitions. -__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) -__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) +__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) +__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) +__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) +__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, + Int16, VoidPtrPtr) +__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) +__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) +__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) __OMP_RTL(__last, false, Void, ) @@ -577,8 +603,8 @@ __OMP_ATTRS_SET(DefaultAttrs, __OMP_ATTRS_SET(BarrierAttrs, OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind)) - : AttributeSet(EnumAttr(NoUnwind))) + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) + : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) __OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, OptimisticAttributes @@ -650,6 +676,11 @@ __OMP_ATTRS_SET(ReturnAlignedPtrAttrs, __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(), diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index e92447d79feac..cf1bd246d2b29 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -888,313 +888,313 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK: declare dso_local i32 @omp_pause_resource_all(i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0 +; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0 +; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 +; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0 +; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0 +; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0 +; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0 +; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0 +; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 +; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0 +; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0 +; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0 +; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0 +; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) -; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0 +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0 +; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0 +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 +; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 +; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0 +; CHECK-NEXT: declare void @__tgt_register_requires(i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 +; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0 +; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0 +; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0 +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0 +; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) @@ -1212,52 +1212,52 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_int(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2 +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority() @@ -1326,7 +1326,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device() @@ -1356,25 +1356,25 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2 +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1 +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*) @@ -1419,7 +1419,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1 +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) @@ -1427,7 +1427,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1451,13 +1451,13 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1466,22 +1466,22 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1523,10 +1523,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1535,10 +1535,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1598,7 +1598,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1622,7 +1622,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1634,16 +1634,16 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: nounwind +; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn From afc277b0ed0dcd9fbbde6015bbdf289349fb2104 Mon Sep 17 00:00:00 2001 From: David Tenty Date: Tue, 29 Sep 2020 11:30:28 -0400 Subject: [PATCH 189/544] [AIX][Clang][Driver] Link libm in c++ mode since that is the normal behaviour of other compilers on the platform. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D88500 --- clang/lib/Driver/ToolChains/AIX.cpp | 3 +++ clang/test/Driver/aix-ld.c | 25 ++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index b56ddf55cb30d..351b34e8bf90f 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -162,6 +162,9 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_pthreads, options::OPT_pthread)) CmdArgs.push_back("-lpthreads"); + if (D.CCCIsCXX()) + CmdArgs.push_back("-lm"); + CmdArgs.push_back("-lc"); } diff --git a/clang/test/Driver/aix-ld.c b/clang/test/Driver/aix-ld.c index 224e355aac136..7ccbeff3b8b64 100644 --- a/clang/test/Driver/aix-ld.c +++ b/clang/test/Driver/aix-ld.c @@ -20,6 +20,7 @@ // CHECK-LD32: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-NOT: "-lc++" // CHECK-LD32: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-NOT: "-lm" // CHECK-LD32: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. @@ -41,6 +42,7 @@ // CHECK-LD64: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NOT: "-lc++" // CHECK-LD64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-NOT: "-lm" // CHECK-LD64: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Enable POSIX thread support. @@ -64,6 +66,7 @@ // CHECK-LD32-PTHREAD-NOT: "-lc++" // CHECK-LD32-PTHREAD: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" // CHECK-LD32-PTHREAD: "-lpthreads" +// CHECK-LD32-PTHREAD-NOT: "-lm" // CHECK-LD32-PTHREAD: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. POSIX thread alias. @@ -87,6 +90,7 @@ // CHECK-LD64-PTHREAD-NOT: "-lc++" // CHECK-LD64-PTHREAD: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" // CHECK-LD64-PTHREAD: "-lpthreads" +// CHECK-LD64-PTHREAD-NOT: "-lm" // CHECK-LD64-PTHREAD: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Enable profiling. @@ -109,6 +113,7 @@ // CHECK-LD32-PROF: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-PROF-NOT: "-lc++" // CHECK-LD32-PROF: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-PROF-NOT: "-lm" // CHECK-LD32-PROF: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. Enable g-profiling. @@ -131,6 +136,7 @@ // CHECK-LD64-GPROF: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-GPROF-NOT: "-lc++" // CHECK-LD64-GPROF: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-GPROF-NOT: "-lm" // CHECK-LD64-GPROF: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Static linking. @@ -153,6 +159,7 @@ // CHECK-LD32-STATIC: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-STATIC-NOT: "-lc++" // CHECK-LD32-STATIC: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-STATIC-NOT: "-lm" // CHECK-LD32-STATIC: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. Library search path. @@ -176,6 +183,7 @@ // CHECK-LD32-LIBP: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-LIBP-NOT: "-lc++" // CHECK-LD32-LIBP: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-LIBP-NOT: "-lm" // CHECK-LD32-LIBP: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. nostdlib. @@ -200,6 +208,7 @@ // CHECK-LD32-NO-STD-LIB-NOT: "-lc++" // CHECK-LD32-NO-STD-LIB-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" // CHECK-LD32-NO-STD-LIB-NOT: "-lpthreads" +// CHECK-LD32-NO-STD-LIB-NOT: "-lm" // CHECK-LD32-NO-STD-LIB-NOT: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. nodefaultlibs. @@ -224,6 +233,7 @@ // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lc++" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lpthreads" +// CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lm" // CHECK-LD64-NO-DEFAULT-LIBS-NOT: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. 'bcdtors' and argument order. @@ -247,6 +257,7 @@ // CHECK-LD32-CXX-ARG-ORDER-NOT: "-bcdtors:all:0:s" // CHECK-LD32-CXX-ARG-ORDER: "-lc++" // CHECK-LD32-CXX-ARG-ORDER: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-CXX-ARG-ORDER: "-lm" // CHECK-LD32-CXX-ARG-ORDER: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. lc++ and lc order. @@ -266,6 +277,7 @@ // CHECK-LD32-CXX-ARG-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-CXX-ARG-LCXX: "-lc++" // CHECK-LD32-CXX-ARG-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-CXX-ARG-LCXX: "-lm" // CHECK-LD32-CXX-ARG-LCXX: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. lc++ and lc order. @@ -285,6 +297,7 @@ // CHECK-LD64-CXX-ARG-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-CXX-ARG-LCXX: "-lc++" // CHECK-LD64-CXX-ARG-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-CXX-ARG-LCXX: "-lm" // CHECK-LD64-CXX-ARG-LCXX: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. -nodefaultlibs. @@ -305,6 +318,7 @@ // CHECK-LD32-NODEFLIB-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-NODEFLIB-LCXX-NOT: "-lc++" // CHECK-LD32-NODEFLIB-LCXX-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-NODEFLIB-LCXX-NOT: "-lm" // CHECK-LD32-NODEFLIB-LCXX-NOT: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -nodefaultlibs. @@ -325,6 +339,7 @@ // CHECK-LD64-NODEFLIB-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NODEFLIB-LCXX-NOT: "-lc++" // CHECK-LD64-NODEFLIB-LCXX-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-NODEFLIB-LCXX-NOT: "-lm" // CHECK-LD64-NODEFLIB-LCXX-NOT: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. -nostdlib. @@ -345,6 +360,7 @@ // CHECK-LD32-NOSTDLIB-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-NOSTDLIB-LCXX-NOT: "-lc++" // CHECK-LD32-NOSTDLIB-LCXX-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-NOSTDLIB-LCXX-NOT: "-lm" // CHECK-LD32-NOSTDLIB-LCXX-NOT: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -nostdlib. @@ -365,6 +381,7 @@ // CHECK-LD64-NOSTDLIB-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NOSTDLIB-LCXX-NOT: "-lc++" // CHECK-LD64-NOSTDLIB-LCXX-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-NOSTDLIB-LCXX-NOT: "-lm" // CHECK-LD64-NOSTDLIB-LCXX-NOT: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. -nostdlib++. @@ -386,6 +403,7 @@ // CHECK-LD32-NOSTDLIBXX-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-NOSTDLIBXX-LCXX-NOT: "-lc++" // CHECK-LD32-NOSTDLIBXX-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-NOSTDLIBXX-LCXX: "-lm" // CHECK-LD32-NOSTDLIBXX-LCXX: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -nostdlib++. @@ -406,6 +424,7 @@ // CHECK-LD64-NOSTDLIBXX-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NOSTDLIBXX-LCXX-NOT: "-lc++" // CHECK-LD64-NOSTDLIBXX-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-NOSTDLIBXX-LCXX: "-lm" // CHECK-LD64-NOSTDLIBXX-LCXX: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 32-bit. -nostartfiles. @@ -424,8 +443,9 @@ // CHECK-LD32-NOSTARTFILES-LCXX-NOT: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" // CHECK-LD32-NOSTARTFILES-LCXX-NOT: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o" // CHECK-LD32-NOSTARTFILES-LCXX: "-L[[SYSROOT]]/usr/lib" -// CHECK-LD32-NOSTARTFILES-LCXX "-lc++" +// CHECK-LD32-NOSTARTFILES-LCXX: "-lc++" // CHECK-LD32-NOSTARTFILES-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-NOSTARTFILES-LCXX: "-lm" // CHECK-LD32-NOSTARTFILES-LCXX: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -nostartfiles. @@ -446,6 +466,7 @@ // CHECK-LD64-NOSTARTFILES-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-NOSTARTFILES-LCXX: "-lc++" // CHECK-LD64-NOSTARTFILES-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-NOSTARTFILES-LCXX: "-lm" // CHECK-LD64-NOSTARTFILES-LCXX: "-lc" // Check powerpc-ibm-aix7.1.0.0, 32-bit. -stdlib=libstdc++ invokes fatal error. @@ -483,6 +504,7 @@ // CHECK-LD32-SHARED: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-SHARED: "-lc++" // CHECK-LD32-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" +// CHECK-LD32-SHARED: "-lm" // CHECK-LD32-SHARED: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -shared. @@ -505,4 +527,5 @@ // CHECK-LD64-SHARED: "-L[[SYSROOT]]/usr/lib" // CHECK-LD64-SHARED: "-lc++" // CHECK-LD64-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc64.a" +// CHECK-LD64-SHARED: "-lm" // CHECK-LD64-SHARED: "-lc" From 8955950c121c97a686310991203c89ba14c90b82 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 30 Sep 2020 10:37:00 -0700 Subject: [PATCH 190/544] Exception support for basic block sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is part of the Propeller framework to do post link code layout optimizations. Please see the RFC here: https://groups.google.com/forum/#!msg/llvm-dev/ef3mKzAdJ7U/1shV64BYBAAJ and the detailed RFC doc here: https://github.com/google/llvm-propeller/blob/plo-dev/Propeller_RFC.pdf This patch provides exception support for basic block sections by splitting the call-site table into call-site ranges corresponding to different basic block sections. Still all landing pads must reside in the same basic block section (which is guaranteed by the the core basic block section patch D73674 (ExceptionSection) ). Each call-site table will refer to the landing pad fragment by explicitly specifying @LPstart (which is omitted in the normal non-basic-block section case). All these call-site tables will share their action and type tables. The C++ ABI somehow assumes that no landing pads point directly to LPStart (which works in the normal case since the function begin is never a landing pad), and uses LP.offset = 0 to specify no landing pad. In the case of basic block section where one section contains all the landing pads, the landing pad offset relative to LPStart could actually be zero. Thus, we avoid zero-offset landing pads by inserting a **nop** operation as the first non-CFI instruction in the exception section. **Background on Exception Handling in C++ ABI** https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf Compiler emits an exception table for every function. When an exception is thrown, the stack unwinding library queries the unwind table (which includes the start and end of each function) to locate the exception table for that function. The exception table includes a call site table for the function, which is used to guide the exception handling runtime to take the appropriate action upon an exception. Each call site record in this table is structured as follows: | CallSite | --> Position of the call site (relative to the function entry) | CallSite length | --> Length of the call site. | Landing Pad | --> Position of the landing pad (relative to the landing pad fragment’s begin label) | Action record offset | --> Position of the first action record The call site records partition a function into different pieces and describe what action must be taken for each callsite. The callsite fields are relative to the start of the function (as captured in the unwind table). The landing pad entry is a reference into the function and corresponds roughly to the catch block of a try/catch statement. When execution resumes at a landing pad, it receives an exception structure and a selector value corresponding to the type of the exception thrown, and executes similar to a switch-case statement. The landing pad field is relative to the beginning of the procedure fragment which includes all the landing pads (@LPStart). The C++ ABI requires all landing pads to be in the same fragment. Nonetheless, without basic block sections, @LPStart is the same as the function @Start (found in the unwind table) and can be omitted. The action record offset is an index into the action table which includes information about which exception types are caught. **C++ Exceptions with Basic Block Sections** Basic block sections break the contiguity of a function fragment. Therefore, call sites must be specified relative to the beginning of the basic block section. Furthermore, the unwinding library should be able to find the corresponding callsites for each section. To do so, the .cfi_lsda directive for a section must point to the range of call-sites for that section. This patch introduces a new **CallSiteRange** structure which specifies the range of call-sites which correspond to every section: `struct CallSiteRange { // Symbol marking the beginning of the precedure fragment. MCSymbol *FragmentBeginLabel = nullptr; // Symbol marking the end of the procedure fragment. MCSymbol *FragmentEndLabel = nullptr; // LSDA symbol for this call-site range. MCSymbol *ExceptionLabel = nullptr; // Index of the first call-site entry in the call-site table which // belongs to this range. size_t CallSiteBeginIdx = 0; // Index just after the last call-site entry in the call-site table which // belongs to this range. size_t CallSiteEndIdx = 0; // Whether this is the call-site range containing all the landing pads. bool IsLPRange = false; };` With N basic-block-sections, the call-site table is partitioned into N call-site ranges. Conceptually, we emit the call-site ranges for sections sequentially in the exception table as if each section has its own exception table. In the example below, two sections result in the two call site ranges (denoted by LSDA1 and LSDA2) placed next to each other. However, their call-sites will refer to records in the shared Action Table. We also emit the header fields (@LPStart and CallSite Table Length) for each call site range in order to place the call site ranges in separate LSDAs. We note that with -basic-block-sections, The CallSiteTableLength will not actually represent the length of the call site table, but rather the reference to the action table. Since the only purpose of this field is to locate the action table, correctness is guaranteed. Finally, every call site range has one @LPStart pointer so the landing pads of each section must all reside in one section (not necessarily the same section). To make this easier, we decide to place all landing pads of the function in one section (hence the `IsLPRange` field in CallSiteRange). | @LPStart | ---> Landing pad fragment ( LSDA1 points here) | CallSite Table Length | ---> Used to find the action table. | CallSites | | … | | … | | @LPStart | ---> Landing pad fragment ( LSDA2 points here) | CallSite Table Length | | CallSites | | … | | … | … … | Action Table | | Types Table | Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D73739 --- llvm/include/llvm/CodeGen/AsmPrinter.h | 11 +- llvm/include/llvm/CodeGen/AsmPrinterHandler.h | 3 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 11 +- .../CodeGen/AsmPrinter/DwarfCFIException.cpp | 7 +- llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp | 268 +++++++++++++----- llvm/lib/CodeGen/AsmPrinter/EHStreamer.h | 43 ++- llvm/lib/CodeGen/AsmPrinter/WasmException.cpp | 1 + llvm/lib/CodeGen/AsmPrinter/WasmException.h | 1 + llvm/lib/CodeGen/BasicBlockSections.cpp | 21 ++ llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 2 +- .../X86/gcc_except_table_bb_sections.ll | 166 +++++++++++ ...able_bb_sections_ehpad_groups_with_cold.ll | 96 +++++++ 12 files changed, 541 insertions(+), 89 deletions(-) create mode 100644 llvm/test/CodeGen/X86/gcc_except_table_bb_sections.ll create mode 100644 llvm/test/CodeGen/X86/gcc_except_table_bb_sections_ehpad_groups_with_cold.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 11ba36aee5a80..3056568ccf98c 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -141,7 +141,11 @@ class AsmPrinter : public MachineFunctionPass { private: MCSymbol *CurrentFnEnd = nullptr; - MCSymbol *CurExceptionSym = nullptr; + + /// Map a basic block section ID to the exception symbol associated with that + /// section. Map entries are assigned and looked up via + /// AsmPrinter::getMBBExceptionSym. + DenseMap MBBSectionExceptionSyms; // The symbol used to represent the start of the current BB section of the // function. This is used to calculate the size of the BB section. @@ -238,7 +242,10 @@ class AsmPrinter : public MachineFunctionPass { MCSymbol *getFunctionBegin() const { return CurrentFnBegin; } MCSymbol *getFunctionEnd() const { return CurrentFnEnd; } - MCSymbol *getCurExceptionSym(); + + // Return the exception symbol associated with the MBB section containing a + // given basic block. + MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB); /// Return information about object file lowering. const TargetLoweringObjectFile &getObjFileLowering() const; diff --git a/llvm/include/llvm/CodeGen/AsmPrinterHandler.h b/llvm/include/llvm/CodeGen/AsmPrinterHandler.h index 899d067d03f00..b9837dc168e90 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinterHandler.h +++ b/llvm/include/llvm/CodeGen/AsmPrinterHandler.h @@ -24,7 +24,8 @@ class MachineFunction; class MachineInstr; class MCSymbol; -typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm); +typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm, + const MachineBasicBlock *MBB); /// Collects and handles AsmPrinter objects required to build debug /// or EH information. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5b4e347cf6c85..613e7ebff2dfd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1763,10 +1763,11 @@ bool AsmPrinter::doFinalization(Module &M) { return false; } -MCSymbol *AsmPrinter::getCurExceptionSym() { - if (!CurExceptionSym) - CurExceptionSym = createTempSymbol("exception"); - return CurExceptionSym; +MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) { + auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum()); + if (Res.second) + Res.first->second = createTempSymbol("exception"); + return Res.first->second; } void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { @@ -1793,7 +1794,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurrentSectionBeginSym = nullptr; MBBSectionRanges.clear(); - CurExceptionSym = nullptr; + MBBSectionExceptionSyms.clear(); bool NeedsLocalForSize = MAI->needsLocalForSize(); if (F.hasFnAttribute("patchable-function-entry") || F.hasFnAttribute("function-instrument") || diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 11ed1062f77e4..c20ac6040aef7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -81,8 +81,9 @@ void DwarfCFIException::endModule() { } } -static MCSymbol *getExceptionSym(AsmPrinter *Asm) { - return Asm->getCurExceptionSym(); +static MCSymbol *getExceptionSym(AsmPrinter *Asm, + const MachineBasicBlock *MBB) { + return Asm->getMBBExceptionSym(*MBB); } void DwarfCFIException::beginFunction(const MachineFunction *MF) { @@ -161,7 +162,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, // Provide LSDA information. if (shouldEmitLSDA) - Asm->OutStreamer->emitCFILsda(ESP(Asm), TLOF.getLSDAEncoding()); + Asm->OutStreamer->emitCFILsda(ESP(Asm, MBB), TLOF.getLSDAEncoding()); } /// endFunction - Gather and emit post-function exception information. diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 0691c8bf10788..4bf14af0721ec 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -214,10 +214,25 @@ void EHStreamer::computePadMap( /// the landing pad and the action. Calls marked 'nounwind' have no entry and /// must not be contained in the try-range of any entry - they form gaps in the /// table. Entries must be ordered by try-range address. -void EHStreamer:: -computeCallSiteTable(SmallVectorImpl &CallSites, - const SmallVectorImpl &LandingPads, - const SmallVectorImpl &FirstActions) { +/// +/// Call-sites are split into one or more call-site ranges associated with +/// different sections of the function. +/// +/// - Without -basic-block-sections, all call-sites are grouped into one +/// call-site-range corresponding to the function section. +/// +/// - With -basic-block-sections, one call-site range is created for each +/// section, with its FragmentBeginLabel and FragmentEndLabel respectively +// set to the beginning and ending of the corresponding section and its +// ExceptionLabel set to the exception symbol dedicated for this section. +// Later, one LSDA header will be emitted for each call-site range with its +// call-sites following. The action table and type info table will be +// shared across all ranges. +void EHStreamer::computeCallSiteTable( + SmallVectorImpl &CallSites, + SmallVectorImpl &CallSiteRanges, + const SmallVectorImpl &LandingPads, + const SmallVectorImpl &FirstActions) { RangeMapType PadMap; computePadMap(LandingPads, PadMap); @@ -235,6 +250,21 @@ computeCallSiteTable(SmallVectorImpl &CallSites, // Visit all instructions in order of address. for (const auto &MBB : *Asm->MF) { + if (&MBB == &Asm->MF->front() || MBB.isBeginSection()) { + // We start a call-site range upon function entry and at the beginning of + // every basic block section. + CallSiteRanges.push_back( + {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel, + Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel, + Asm->getMBBExceptionSym(MBB), CallSites.size()}); + PreviousIsInvoke = false; + SawPotentiallyThrowing = false; + LastLabel = nullptr; + } + + if (MBB.isEHPad()) + CallSiteRanges.back().IsLPRange = true; + for (const auto &MI : MBB) { if (!MI.isEHLabel()) { if (MI.isCall()) @@ -306,13 +336,22 @@ computeCallSiteTable(SmallVectorImpl &CallSites, PreviousIsInvoke = true; } } - } - // If some instruction between the previous try-range and the end of the - // function may throw, create a call-site entry with no landing pad for the - // region following the try-range. - if (SawPotentiallyThrowing && !IsSJLJ) - CallSites.push_back({LastLabel, Asm->getFunctionEnd(), nullptr, 0}); + // We end the call-site range upon function exit and at the end of every + // basic block section. + if (&MBB == &Asm->MF->back() || MBB.isEndSection()) { + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for + // the region following the try-range. + if (SawPotentiallyThrowing && !IsSJLJ) { + CallSiteEntry Site = {LastLabel, CallSiteRanges.back().FragmentEndLabel, + nullptr, 0}; + CallSites.push_back(Site); + SawPotentiallyThrowing = false; + } + CallSiteRanges.back().CallSiteEndIdx = CallSites.size(); + } + } } /// Emit landing pads and actions. @@ -362,9 +401,13 @@ MCSymbol *EHStreamer::emitExceptionTable() { SmallVector FirstActions; computeActionsTable(LandingPads, Actions, FirstActions); - // Compute the call-site table. + // Compute the call-site table and call-site ranges. Normally, there is only + // one call-site-range which covers the whole funciton. With + // -basic-block-sections, there is one call-site-range per basic block + // section. SmallVector CallSites; - computeCallSiteTable(CallSites, LandingPads, FirstActions); + SmallVector CallSiteRanges; + computeCallSiteTable(CallSites, CallSiteRanges, LandingPads, FirstActions); bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm; @@ -424,35 +467,49 @@ MCSymbol *EHStreamer::emitExceptionTable() { Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); Asm->OutStreamer->emitLabel(GCCETSym); - Asm->OutStreamer->emitLabel(Asm->getCurExceptionSym()); - - // Emit the LSDA header. - Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); - Asm->emitEncodingByte(TTypeEncoding, "@TType"); + MCSymbol *CstEndLabel = Asm->createTempSymbol( + CallSiteRanges.size() > 1 ? "action_table_base" : "cst_end"); MCSymbol *TTBaseLabel = nullptr; - if (HaveTTData) { - // N.B.: There is a dependency loop between the size of the TTBase uleb128 - // here and the amount of padding before the aligned type table. The - // assembler must sometimes pad this uleb128 or insert extra padding before - // the type table. See PR35809 or GNU as bug 4029. - MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + if (HaveTTData) TTBaseLabel = Asm->createTempSymbol("ttbase"); - Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); - Asm->OutStreamer->emitLabel(TTBaseRefLabel); - } - bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + // Helper for emitting references (offsets) for type table and the end of the + // call-site table (which marks the beginning of the action table). + // * For Itanium, these references will be emitted for every callsite range. + // * For SJLJ and Wasm, they will be emitted only once in the LSDA header. + auto EmitTypeTableRefAndCallSiteTableEndRef = [&]() { + Asm->emitEncodingByte(TTypeEncoding, "@TType"); + if (HaveTTData) { + // N.B.: There is a dependency loop between the size of the TTBase uleb128 + // here and the amount of padding before the aligned type table. The + // assembler must sometimes pad this uleb128 or insert extra padding + // before the type table. See PR35809 or GNU as bug 4029. + MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); + Asm->OutStreamer->emitLabel(TTBaseRefLabel); + } - // Emit the landing pad call site table. - MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); - MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end"); - Asm->emitEncodingByte(CallSiteEncoding, "Call site"); - Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); - Asm->OutStreamer->emitLabel(CstBeginLabel); + // The Action table follows the call-site table. So we emit the + // label difference from here (start of the call-site table for SJLJ and + // Wasm, and start of a call-site range for Itanium) to the end of the + // whole call-site table (end of the last call-site range for Itanium). + MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); + Asm->emitEncodingByte(CallSiteEncoding, "Call site"); + Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); + Asm->OutStreamer->emitLabel(CstBeginLabel); + }; // SjLj / Wasm Exception handling if (IsSJLJ || IsWasm) { + Asm->OutStreamer->emitLabel(Asm->getMBBExceptionSym(Asm->MF->front())); + + // emit the LSDA header. + Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + EmitTypeTableRefAndCallSiteTableEndRef(); + unsigned idx = 0; for (SmallVectorImpl::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { @@ -477,6 +534,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { } Asm->emitULEB128(S.Action); } + Asm->OutStreamer->emitLabel(CstEndLabel); } else { // Itanium LSDA exception handling @@ -498,50 +556,124 @@ MCSymbol *EHStreamer::emitExceptionTable() { // A missing entry in the call-site table indicates that a call is not // supposed to throw. + assert(CallSiteRanges.size() != 0 && "No call-site ranges!"); + + // There should be only one call-site range which includes all the landing + // pads. Find that call-site range here. + const CallSiteRange *LandingPadRange = nullptr; + for (const CallSiteRange &CSRange : CallSiteRanges) { + if (CSRange.IsLPRange) { + assert(LandingPadRange == nullptr && + "All landing pads must be in a single callsite range."); + LandingPadRange = &CSRange; + } + } + + // The call-site table is split into its call-site ranges, each being + // emitted as: + // [ LPStartEncoding | LPStart ] + // [ TypeTableEncoding | TypeTableOffset ] + // [ CallSiteEncoding | CallSiteTableEndOffset ] + // cst_begin -> { call-site entries contained in this range } + // + // and is followed by the next call-site range. + // + // For each call-site range, CallSiteTableEndOffset is computed as the + // difference between cst_begin of that range and the last call-site-table's + // end label. This offset is used to find the action table. + unsigned Entry = 0; - for (SmallVectorImpl::const_iterator - I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { - const CallSiteEntry &S = *I; + for (const CallSiteRange &CSRange : CallSiteRanges) { + if (CSRange.CallSiteBeginIdx != 0) { + // Align the call-site range for all ranges except the first. The + // first range is already aligned due to the exception table alignment. + Asm->emitAlignment(Align(4)); + } + Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel); + + // Emit the LSDA header. + // If only one call-site range exists, LPStart is omitted as it is the + // same as the function entry. + if (CallSiteRanges.size() == 1) { + Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + } else if (!Asm->isPositionIndependent()) { + // For more than one call-site ranges, LPStart must be explicitly + // specified. + // For non-PIC we can simply use the absolute value. + Asm->emitEncodingByte(dwarf::DW_EH_PE_absptr, "@LPStart"); + Asm->OutStreamer->emitSymbolValue(LandingPadRange->FragmentBeginLabel, + Asm->MAI->getCodePointerSize()); + } else { + // For PIC mode, we Emit a PC-relative address for LPStart. + Asm->emitEncodingByte(dwarf::DW_EH_PE_pcrel, "@LPStart"); + MCContext &Context = Asm->OutStreamer->getContext(); + MCSymbol *Dot = Context.createTempSymbol(); + Asm->OutStreamer->emitLabel(Dot); + Asm->OutStreamer->emitValue( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(LandingPadRange->FragmentBeginLabel, + Context), + MCSymbolRefExpr::create(Dot, Context), Context), + Asm->MAI->getCodePointerSize()); + } + + EmitTypeTableRefAndCallSiteTableEndRef(); - MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); + for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx; + CallSiteIdx != CSRange.CallSiteEndIdx; ++CallSiteIdx) { + const CallSiteEntry &S = CallSites[CallSiteIdx]; - // Offset of the call site relative to the start of the procedure. - if (VerboseAsm) - Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->emitCallSiteOffset(S.BeginLabel, EHFuncBeginSym, CallSiteEncoding); - if (VerboseAsm) - Asm->OutStreamer->AddComment(Twine(" Call between ") + - S.BeginLabel->getName() + " and " + - S.EndLabel->getName()); - Asm->emitCallSiteOffset(S.EndLabel, S.BeginLabel, CallSiteEncoding); + MCSymbol *EHFuncBeginSym = CSRange.FragmentBeginLabel; + MCSymbol *EHFuncEndSym = CSRange.FragmentEndLabel; - // Offset of the landing pad relative to the start of the procedure. - if (!S.LPad) { + MCSymbol *BeginLabel = S.BeginLabel; + if (!BeginLabel) + BeginLabel = EHFuncBeginSym; + MCSymbol *EndLabel = S.EndLabel; + if (!EndLabel) + EndLabel = EHFuncEndSym; + + // Offset of the call site relative to the start of the procedure. if (VerboseAsm) - Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->emitCallSiteValue(0, CallSiteEncoding); - } else { + Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + + " <<"); + Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); if (VerboseAsm) - Asm->OutStreamer->AddComment(Twine(" jumps to ") + - S.LPad->LandingPadLabel->getName()); - Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, - CallSiteEncoding); - } + Asm->OutStreamer->AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); + + // Offset of the landing pad relative to the start of the landing pad + // fragment. + if (!S.LPad) { + if (VerboseAsm) + Asm->OutStreamer->AddComment(" has no landing pad"); + Asm->emitCallSiteValue(0, CallSiteEncoding); + } else { + if (VerboseAsm) + Asm->OutStreamer->AddComment(Twine(" jumps to ") + + S.LPad->LandingPadLabel->getName()); + Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, + LandingPadRange->FragmentBeginLabel, + CallSiteEncoding); + } - // Offset of the first associated action record, relative to the start of - // the action table. This value is biased by 1 (1 indicates the start of - // the action table), and 0 indicates that there are no actions. - if (VerboseAsm) { - if (S.Action == 0) - Asm->OutStreamer->AddComment(" On action: cleanup"); - else - Asm->OutStreamer->AddComment(" On action: " + - Twine((S.Action - 1) / 2 + 1)); + // Offset of the first associated action record, relative to the start + // of the action table. This value is biased by 1 (1 indicates the start + // of the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer->AddComment(" On action: cleanup"); + else + Asm->OutStreamer->AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->emitULEB128(S.Action); } - Asm->emitULEB128(S.Action); } + Asm->OutStreamer->emitLabel(CstEndLabel); } - Asm->OutStreamer->emitLabel(CstEndLabel); // Emit the Action Table. int Entry = 0; @@ -596,7 +728,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { const std::vector &TypeInfos = MF->getTypeInfos(); const std::vector &FilterIds = MF->getFilterIds(); - bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); int Entry = 0; // Emit the Catch TypeInfos. diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index e62cf17a05d4b..234e62506a563 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -69,23 +69,48 @@ class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { unsigned Action; }; + /// Structure describing a contiguous range of call-sites which reside + /// in the same procedure fragment. With -fbasic-block-sections, there will + /// be one call site range per basic block section. Otherwise, we will have + /// one call site range containing all the call sites in the function. + struct CallSiteRange { + // Symbol marking the beginning of the precedure fragment. + MCSymbol *FragmentBeginLabel = nullptr; + // Symbol marking the end of the procedure fragment. + MCSymbol *FragmentEndLabel = nullptr; + // LSDA symbol for this call-site range. + MCSymbol *ExceptionLabel = nullptr; + // Index of the first call-site entry in the call-site table which + // belongs to this range. + size_t CallSiteBeginIdx = 0; + // Index just after the last call-site entry in the call-site table which + // belongs to this range. + size_t CallSiteEndIdx = 0; + // Whether this is the call-site range containing all the landing pads. + bool IsLPRange = false; + }; + /// Compute the actions table and gather the first action index for each /// landing pad site. - void computeActionsTable(const SmallVectorImpl &LandingPads, - SmallVectorImpl &Actions, - SmallVectorImpl &FirstActions); + void computeActionsTable( + const SmallVectorImpl &LandingPads, + SmallVectorImpl &Actions, + SmallVectorImpl &FirstActions); void computePadMap(const SmallVectorImpl &LandingPads, RangeMapType &PadMap); - /// Compute the call-site table. The entry for an invoke has a try-range - /// containing the call, a non-zero landing pad and an appropriate action. - /// The entry for an ordinary call has a try-range containing the call and - /// zero for the landing pad and the action. Calls marked 'nounwind' have - /// no entry and must not be contained in the try-range of any entry - they - /// form gaps in the table. Entries must be ordered by try-range address. + /// Compute the call-site table and the call-site ranges. The entry for an + /// invoke has a try-range containing the call, a non-zero landing pad and an + /// appropriate action. The entry for an ordinary call has a try-range + /// containing the call and zero for the landing pad and the action. Calls + /// marked 'nounwind' have no entry and must not be contained in the try-range + /// of any entry - they form gaps in the table. Entries must be ordered by + /// try-range address. CallSiteRanges vector is only populated for Itanium + /// exception handling. virtual void computeCallSiteTable( SmallVectorImpl &CallSites, + SmallVectorImpl &CallSiteRanges, const SmallVectorImpl &LandingPads, const SmallVectorImpl &FirstActions); diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index baef4d2cc8499..aa19a0f5e824d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -76,6 +76,7 @@ void WasmException::endFunction(const MachineFunction *MF) { // information. void WasmException::computeCallSiteTable( SmallVectorImpl &CallSites, + SmallVectorImpl &CallSiteRanges, const SmallVectorImpl &LandingPads, const SmallVectorImpl &FirstActions) { MachineFunction &MF = *Asm->MF; diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/llvm/lib/CodeGen/AsmPrinter/WasmException.h index 1893b6b2df43d..f06de786bd760 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WasmException.h +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.h @@ -32,6 +32,7 @@ class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer { // Compute the call site table for wasm EH. void computeCallSiteTable( SmallVectorImpl &CallSites, + SmallVectorImpl &CallSiteRanges, const SmallVectorImpl &LandingPads, const SmallVectorImpl &FirstActions) override; }; diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 9692e9b9f0914..adbe94a5dad86 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -293,6 +293,26 @@ void llvm::sortBasicBlocksAndUpdateBranches( updateBranches(MF, PreLayoutFallThroughs); } +// If the exception section begins with a landing pad, that landing pad will +// assume a zero offset (relative to @LPStart) in the LSDA. However, a value of +// zero implies "no landing pad." This function inserts a NOP just before the EH +// pad label to ensure a nonzero offset. Returns true if padding is not needed. +static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { + for (auto &MBB : MF) { + if (MBB.isBeginSection() && MBB.isEHPad()) { + MachineBasicBlock::iterator MI = MBB.begin(); + while (!MI->isEHLabel()) + ++MI; + MCInst Noop; + MF.getSubtarget().getInstrInfo()->getNoop(Noop); + BuildMI(MBB, MI, DebugLoc(), + MF.getSubtarget().getInstrInfo()->get(Noop.getOpcode())); + return false; + } + } + return true; +} + bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { auto BBSectionsType = MF.getTarget().getBBSectionsType(); assert(BBSectionsType != BasicBlockSection::None && @@ -354,6 +374,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { }; sortBasicBlocksAndUpdateBranches(MF, Comparator); + avoidZeroOffsetLandingPad(MF); return true; } diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index d6c1efa6327c8..180513a549520 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -903,7 +903,7 @@ void ARMAsmPrinter::emitMachineConstantPoolValue( MCSymbol *MCSym; if (ACPV->isLSDA()) { - MCSym = getCurExceptionSym(); + MCSym = getMBBExceptionSym(MF->front()); } else if (ACPV->isBlockAddress()) { const BlockAddress *BA = cast(ACPV)->getBlockAddress(); diff --git a/llvm/test/CodeGen/X86/gcc_except_table_bb_sections.ll b/llvm/test/CodeGen/X86/gcc_except_table_bb_sections.ll new file mode 100644 index 0000000000000..ce0cc55b54e37 --- /dev/null +++ b/llvm/test/CodeGen/X86/gcc_except_table_bb_sections.ll @@ -0,0 +1,166 @@ +; RUN: llc -basic-block-sections=all -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NON-PIC +; RUN: llc -basic-block-sections=all -mtriple x86_64-pc-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PIC +@_ZTIi = external constant i8* + +define i32 @main() uwtable optsize ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; Verify that each basic block section gets its own LSDA exception symbol. +; +; CHECK-LABEL: main: +; CHECK-NEXT: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc + +;; Verify personality function and LSDA encoding for NON-PIC mode. +; PersonalityEncoding = dwarf::DW_EH_PE_udata4 +; CHECK-NON-PIC-NEXT: .cfi_personality 3, __gxx_personality_v0 +; LSDAEncoding = dwarf::DW_EH_PE_udata4 +; CHECK-NON-PIC-NEXT: .cfi_lsda 3, .Lexception0 + +;; Verify personality function and LSDA encoding for PIC mode. +; PersonalityEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-PIC-NEXT: .cfi_personality 155, DW.ref.__gxx_personality_v0 +; LSDAEncoding = DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-PIC-NEXT: .cfi_lsda 27, .Lexception0 + +; CHECK-LABEL: .Ltmp0: +; CHECK-NEXT: callq _Z1fv +; CHECK-LABEL: .Ltmp1: + +; CHECK-NOT: .cfi_lsda + +; CHECK-LABEL: main.1: +; CHECK-NEXT: .cfi_startproc + +; CHECK-NON-PIC-NEXT: .cfi_personality 3, __gxx_personality_v0 +; CHECK-NON-PIC-NEXT: .cfi_lsda 3, .Lexception1 + +; CHECK-PIC-NEXT: .cfi_personality 155, DW.ref.__gxx_personality_v0 +; CHECK-PIC-NEXT: .cfi_lsda 27, .Lexception1 + +; CHECK-NOT: .cfi_lsda + +; CHECK-LABEL: main.2: +; CHECK-NEXT: .cfi_startproc + +; CHECK-NON-PIC-NEXT: .cfi_personality 3, __gxx_personality_v0 +; CHECK-NON-PIC-NEXT: .cfi_lsda 3, .Lexception2 + +; CHECK-PIC-NEXT: .cfi_personality 155, DW.ref.__gxx_personality_v0 +; CHECK-PIC-NEXT: .cfi_lsda 27, .Lexception2 + +; CHECK: nop +; CHECK-LABEL: .Ltmp2: +; CHECK-LABEL: .LBB_END0_2: + +; CHECK-NOT: .cfi_lsda + +entry: + invoke void @_Z1fv() optsize + to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + br label %eh.resume + +try.cont: + ret i32 0 + +eh.resume: + resume { i8*, i32 } %0 +} + +declare void @_Z1fv() optsize + +declare i32 @__gxx_personality_v0(...) +;; Verify that the exception table gets split across the three basic block sections. +; +; CHECK: .section .gcc_except_table +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: GCC_except_table0: +; CHECK-NEXT: .Lexception0: + +;; Verify @LPStart encoding for NON-PIC mode. +; CHECK-NON-PIC-NEXT: .byte 0 # @LPStart Encoding = absptr +; CHECK-NON-PIC-NEXT: .quad main.2 + +;; Verify @LPStart encoding for PIC mode. +; CHECK-PIC-NEXT: .byte 16 # @LPStart Encoding = pcrel +; CHECK-PIC-NEXT: [[DOT:\.Ltmp[0-9]+]]: +; CHECK-PIC-NEXT: .quad main.2-[[DOT]] + +;; Verify @TType encoding for NON-PIC mode. +; CHECK-NON-PIC-NEXT: .byte 3 # @TType Encoding = udata4 + +;; Verify @TType encoding for PIC mode. +; CHECK-PIC-NEXT: .byte 156 # @TType Encoding = indirect pcrel sdata8 + +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref0 +; CHECK-NEXT: .Lttbaseref0: +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin0 +; CHECK-NEXT: .Lcst_begin0: +; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; CHECK-NEXT: .uleb128 .Ltmp2-main.2 # jumps to .Ltmp2 +; CHECK-NEXT: .byte 3 # On action: 2 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .Lexception1: + +; CHECK-NON-PIC-NEXT: .byte 0 # @LPStart Encoding = absptr +; CHECK-NON-PIC-NEXT: .quad main.2 + +; CHECK-PIC-NEXT: .byte 16 # @LPStart Encoding = pcrel +; CHECK-PIC-NEXT: [[DOT:\.Ltmp[0-9]+]]: +; CHECK-PIC-NEXT: .quad main.2-[[DOT]] + +; CHECK-NON-PIC-NEXT: .byte 3 # @TType Encoding = udata4 + +; CHECK-PIC-NEXT: .byte 156 # @TType Encoding = indirect pcrel sdata8 + +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref1 +; CHECK-NEXT: .Lttbaseref1: +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin1 +; CHECK-NEXT: .Lcst_begin1: +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .Lexception2: + +; CHECK-NON-PIC-NEXT: .byte 0 # @LPStart Encoding = absptr +; CHECK-NON-PIC-NEXT: .quad main.2 + +; CHECK-PIC-NEXT: .byte 16 # @LPStart Encoding = pcrel +; CHECK-PIC-NEXT: [[DOT:\.Ltmp[0-9]+]]: +; CHECK-PIC-NEXT: .quad main.2-[[DOT]] + +; CHECK-NON-PIC-NEXT: .byte 3 # @TType Encoding = udata4 + +; CHECK-PIC-NEXT: .byte 156 # @TType Encoding = indirect pcrel sdata8 + +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref2 +; CHECK-NEXT: .Lttbaseref2: +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin2 +; CHECK-NEXT: .Lcst_begin2: +; CHECK-NEXT: .uleb128 main.2-main.2 # >> Call Site 2 << +; CHECK-NEXT: .uleb128 .LBB_END0_2-main.2 # Call between main.2 and .LBB_END0_2 +; CHECK-NEXT: .byte 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup +; CHECK-NEXT: .Laction_table_base0: +; CHECK-NEXT: .byte 0 # >> Action Record 1 << +; CHECK-NEXT: # Cleanup +; CHECK-NEXT: .byte 0 # No further actions +; CHECK-NEXT: .byte 1 # >> Action Record 2 << +; CHECK-NEXT: # Catch TypeInfo 1 +; CHECK-NEXT: .byte 125 # Continue to action 1 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # >> Catch TypeInfos << + +; CHECK-NON-PIC-NEXT: .long _ZTIi # TypeInfo 1 + +; CHECK-PIC-NEXT: [[DOT:\.Ltmp[0-9]+]]: +; CHECK-PIC-NEXT: .quad .L_ZTIi.DW.stub-[[DOT]] + +; CHECK-NEXT: .Lttbase0: +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # -- End function diff --git a/llvm/test/CodeGen/X86/gcc_except_table_bb_sections_ehpad_groups_with_cold.ll b/llvm/test/CodeGen/X86/gcc_except_table_bb_sections_ehpad_groups_with_cold.ll new file mode 100644 index 0000000000000..541335a176d43 --- /dev/null +++ b/llvm/test/CodeGen/X86/gcc_except_table_bb_sections_ehpad_groups_with_cold.ll @@ -0,0 +1,96 @@ +; Check that when all exception handling blocks are cold, they get grouped with the cold bbs. +; RUN: echo '!main' > %t +; RUN: echo '!!0' >> %t +; RUN: llc -function-sections -basic-block-sections=%t -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s +@_ZTIi = external constant i8* + +define i32 @main() uwtable optsize ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; Verify that each basic block section gets its own LSDA exception symbol. +; +; CHECK-LABEL: main: +; CHECK-NEXT: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; PersonalityEncoding = dwarf::DW_EH_PE_udata4 +; CHECK-NEXT: .cfi_personality 3, __gxx_personality_v0 +; LSDAEncoding = dwarf::DW_EH_PE_udata4 +; CHECK-NEXT: .cfi_lsda 3, .Lexception0 +; CHECK-LABEL: .Ltmp0: +; CHECK-LABEL: .Ltmp1: + +; CHECK-NOT: .cfi_lsda + +; CHECK-LABEL: main.cold: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: .cfi_personality 3, __gxx_personality_v0 +; CHECK-NEXT: .cfi_lsda 3, .Lexception1 +; CHECK-LABEL: .Ltmp2: +; CHECK-LABEL: .LBB_END0_2: + +; CHECK-NOT: .cfi_lsda + +entry: + invoke void @_Z1fv() optsize + to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + br label %eh.resume + +try.cont: + ret i32 0 + +eh.resume: + resume { i8*, i32 } %0 +} + +declare void @_Z1fv() optsize + +declare i32 @__gxx_personality_v0(...) + +; Verify that the exception table gets split across the two basic block sections. +; +; CHECK: .section .gcc_except_table +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: GCC_except_table0: +; CHECK-NEXT: .Lexception0: +; CHECK-NEXT: .byte 0 # @LPStart Encoding = absptr +; CHECK-NEXT: .quad main.cold +; CHECK-NEXT: .byte 3 # @TType Encoding = udata4 +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref0 +; CHECK-NEXT: .Lttbaseref0: +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin0 +; CHECK-NEXT: .Lcst_begin0: +; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; CHECK-NEXT: .uleb128 .Ltmp2-main.cold # jumps to .Ltmp2 +; CHECK-NEXT: .byte 3 # On action: 2 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .Lexception1: +; CHECK-NEXT: .byte 0 # @LPStart Encoding = absptr +; CHECK-NEXT: .quad main.cold +; CHECK-NEXT: .byte 3 # @TType Encoding = udata4 +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref1 +; CHECK-NEXT: .Lttbaseref1: +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin1 +; CHECK-NEXT: .Lcst_begin1: +; CHECK-NEXT: .uleb128 main.cold-main.cold # >> Call Site 2 << +; CHECK-NEXT: .uleb128 .LBB_END0_2-main.cold # Call between main.cold and .LBB_END0_2 +; CHECK-NEXT: .byte 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup +; CHECK-NEXT: .Laction_table_base0: +; CHECK-NEXT: .byte 0 # >> Action Record 1 << +; CHECK-NEXT: # Cleanup +; CHECK-NEXT: .byte 0 # No further actions +; CHECK-NEXT: .byte 1 # >> Action Record 2 << +; CHECK-NEXT: # Catch TypeInfo 1 +; CHECK-NEXT: .byte 125 # Continue to action 1 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # >> Catch TypeInfos << +; CHECK-NEXT: .long _ZTIi # TypeInfo 1 +; CHECK-NEXT: .Lttbase0: +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # -- End function From c3193e464cbd5e8b7cade103032c222bf8bc0e27 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 30 Sep 2020 10:47:48 -0700 Subject: [PATCH 191/544] [lldb/ipv6] Support running lldb tests in an ipv6-only environment. When running in an ipv6-only environment where `AF_INET` sockets are not available, many lldb tests (mostly gdb remote tests) fail because things like `127.0.0.1` don't work there. Use `localhost` instead of `127.0.0.1` whenever possible, or include a fallback of creating `AF_INET6` sockets when `AF_INET` fails. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D87333 --- .../tools/lldb-server/gdbremote_testcase.py | 10 ++- .../gdb-remote/GDBRemoteCommunication.cpp | 4 +- .../gdb_remote_client/gdbclientutils.py | 13 +++- .../commandline/TestStubReverseConnect.py | 14 +++- lldb/tools/lldb-server/lldb-gdbserver.cpp | 3 +- lldb/unittests/Host/SocketTest.cpp | 69 ++++++++++++++----- lldb/unittests/Host/SocketTestUtilities.cpp | 11 +-- 7 files changed, 93 insertions(+), 31 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py index 253fd35d461e3..7d7b61c8610d9 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py @@ -318,7 +318,13 @@ def _verify_socket(self, sock): raise _ConnectionRefused() # Got EOF, connection dropped. def create_socket(self): - sock = socket.socket() + try: + sock = socket.socket(family=socket.AF_INET) + except OSError as e: + if e.errno != errno.EAFNOSUPPORT: + raise + sock = socket.socket(family=socket.AF_INET6) + logger = self.logger triple = self.dbg.GetSelectedPlatform().GetTriple() @@ -379,7 +385,7 @@ def get_debug_monitor_command_line_args(self, attach_pid=None): ["*:{}".format(self.port)] else: commandline_args = self.debug_monitor_extra_args + \ - ["127.0.0.1:{}".format(self.port)] + ["localhost:{}".format(self.port)] if attach_pid: commandline_args += ["--attach=%d" % attach_pid] diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index 832760f7f0dcc..4981345d6a181 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -1234,7 +1234,7 @@ GDBRemoteCommunication::ConnectLocally(GDBRemoteCommunication &client, const int backlog = 5; TCPSocket listen_socket(true, child_processes_inherit); if (llvm::Error error = - listen_socket.Listen("127.0.0.1:0", backlog).ToError()) + listen_socket.Listen("localhost:0", backlog).ToError()) return error; Socket *accept_socket; @@ -1243,7 +1243,7 @@ GDBRemoteCommunication::ConnectLocally(GDBRemoteCommunication &client, llvm::SmallString<32> remote_addr; llvm::raw_svector_ostream(remote_addr) - << "connect://127.0.0.1:" << listen_socket.GetLocalPortNumber(); + << "connect://localhost:" << listen_socket.GetLocalPortNumber(); std::unique_ptr conn_up( new ConnectionFileDescriptor()); diff --git a/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py b/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py index eb789e861d9c3..dabe9423434d7 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py +++ b/lldb/test/API/functionalities/gdb_remote_client/gdbclientutils.py @@ -1,3 +1,4 @@ +import errno import os import os.path import threading @@ -317,12 +318,20 @@ class MockGDBServer: def __init__(self, port = 0): self.responder = MockGDBServerResponder() self.port = port - self._socket = socket.socket() + try: + self._socket = socket.socket(family=socket.AF_INET) + except OSError as e: + if e.errno != errno.EAFNOSUPPORT: + raise + self._socket = socket.socket(family=socket.AF_INET6) def start(self): # Block until the socket is up, so self.port is available immediately. # Then start a thread that waits for a client connection. - addr = ("127.0.0.1", self.port) + if self._socket.family == socket.AF_INET: + addr = ("127.0.0.1", self.port) + elif self._socket.family == socket.AF_INET6: + addr = ("::1", self.port) self._socket.bind(addr) self.port = self._socket.getsockname()[1] self._socket.listen(1) diff --git a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py index a3250ab4f1bfb..4306f8dcc22d2 100644 --- a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py +++ b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py @@ -1,5 +1,6 @@ from __future__ import print_function +import errno import gdbremote_testcase import lldbgdbserverutils import re @@ -24,11 +25,20 @@ def setUp(self): self.listener_port = self.listener_socket.getsockname()[1] def create_listener_socket(self): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + except OSError as e: + if e.errno != errno.EAFNOSUPPORT: + raise + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) self.assertIsNotNone(sock) sock.settimeout(self.DEFAULT_TIMEOUT) - sock.bind(("127.0.0.1", 0)) + if sock.family == socket.AF_INET: + bind_addr = ("127.0.0.1", 0) + elif sock.family == socket.AF_INET6: + bind_addr = ("::1", 0) + sock.bind(bind_addr) sock.listen(1) def tear_down_listener(): diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp index 5f06503d64244..7f53756424c67 100644 --- a/lldb/tools/lldb-server/lldb-gdbserver.cpp +++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp @@ -267,7 +267,8 @@ void ConnectToRemote(MainLoop &mainloop, final_host_and_port.append("localhost"); final_host_and_port.append(host_and_port); - const std::string::size_type colon_pos = final_host_and_port.find(':'); + // Note: use rfind, because the host/port may look like "[::1]:12345". + const std::string::size_type colon_pos = final_host_and_port.rfind(':'); if (colon_pos != std::string::npos) { connection_host = final_host_and_port.substr(0, colon_pos); connection_port = final_host_and_port.substr(colon_pos + 1); diff --git a/lldb/unittests/Host/SocketTest.cpp b/lldb/unittests/Host/SocketTest.cpp index c53d2660f0c81..901f878d2e466 100644 --- a/lldb/unittests/Host/SocketTest.cpp +++ b/lldb/unittests/Host/SocketTest.cpp @@ -14,12 +14,24 @@ using namespace lldb_private; -class SocketTest : public testing::Test { +struct SocketTestParams { + bool is_ipv6; + std::string localhost_ip; +}; + +class SocketTest : public testing::TestWithParam { public: SubsystemRAII subsystems; + +protected: + bool HostSupportsProtocol() const { + if (GetParam().is_ipv6) + return HostSupportsIPv6(); + return HostSupportsIPv4(); + } }; -TEST_F(SocketTest, DecodeHostAndPort) { +TEST_P(SocketTest, DecodeHostAndPort) { std::string host_str; std::string port_str; int32_t port; @@ -86,7 +98,7 @@ TEST_F(SocketTest, DecodeHostAndPort) { } #if LLDB_ENABLE_POSIX -TEST_F(SocketTest, DomainListenConnectAccept) { +TEST_P(SocketTest, DomainListenConnectAccept) { llvm::SmallString<64> Path; std::error_code EC = llvm::sys::fs::createUniqueDirectory("DomainListenConnectAccept", Path); ASSERT_FALSE(EC); @@ -102,18 +114,22 @@ TEST_F(SocketTest, DomainListenConnectAccept) { } #endif -TEST_F(SocketTest, TCPListen0ConnectAccept) { +TEST_P(SocketTest, TCPListen0ConnectAccept) { + if (!HostSupportsProtocol()) + return; std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; - CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up); + CreateTCPConnectedSockets(GetParam().localhost_ip, &socket_a_up, + &socket_b_up); } -TEST_F(SocketTest, TCPGetAddress) { +TEST_P(SocketTest, TCPGetAddress) { std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; - if (!HostSupportsIPv4()) + if (!HostSupportsProtocol()) return; - CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up); + CreateTCPConnectedSockets(GetParam().localhost_ip, &socket_a_up, + &socket_b_up); EXPECT_EQ(socket_a_up->GetLocalPortNumber(), socket_b_up->GetRemotePortNumber()); @@ -121,11 +137,16 @@ TEST_F(SocketTest, TCPGetAddress) { socket_a_up->GetRemotePortNumber()); EXPECT_NE(socket_a_up->GetLocalPortNumber(), socket_b_up->GetLocalPortNumber()); - EXPECT_STREQ("127.0.0.1", socket_a_up->GetRemoteIPAddress().c_str()); - EXPECT_STREQ("127.0.0.1", socket_b_up->GetRemoteIPAddress().c_str()); + EXPECT_STREQ(GetParam().localhost_ip.c_str(), + socket_a_up->GetRemoteIPAddress().c_str()); + EXPECT_STREQ(GetParam().localhost_ip.c_str(), + socket_b_up->GetRemoteIPAddress().c_str()); } -TEST_F(SocketTest, UDPConnect) { +TEST_P(SocketTest, UDPConnect) { + // UDPSocket::Connect() creates sockets with AF_INET (IPv4). + if (!HostSupportsIPv4()) + return; llvm::Expected> socket = UDPSocket::Connect("127.0.0.1:0", /*child_processes_inherit=*/false); @@ -133,7 +154,9 @@ TEST_F(SocketTest, UDPConnect) { EXPECT_TRUE(socket.get()->IsValid()); } -TEST_F(SocketTest, TCPListen0GetPort) { +TEST_P(SocketTest, TCPListen0GetPort) { + if (!HostSupportsIPv4()) + return; Predicate port_predicate; port_predicate.SetValue(0, eBroadcastNever); llvm::Expected> sock = @@ -143,12 +166,13 @@ TEST_F(SocketTest, TCPListen0GetPort) { EXPECT_NE(sock.get()->GetLocalPortNumber(), 0); } -TEST_F(SocketTest, TCPGetConnectURI) { +TEST_P(SocketTest, TCPGetConnectURI) { std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; - if (!HostSupportsIPv4()) + if (!HostSupportsProtocol()) return; - CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up); + CreateTCPConnectedSockets(GetParam().localhost_ip, &socket_a_up, + &socket_b_up); llvm::StringRef scheme; llvm::StringRef hostname; @@ -160,7 +184,8 @@ TEST_F(SocketTest, TCPGetConnectURI) { EXPECT_EQ(port, socket_a_up->GetRemotePortNumber()); } -TEST_F(SocketTest, UDPGetConnectURI) { +TEST_P(SocketTest, UDPGetConnectURI) { + // UDPSocket::Connect() creates sockets with AF_INET (IPv4). if (!HostSupportsIPv4()) return; llvm::Expected> socket = @@ -177,7 +202,7 @@ TEST_F(SocketTest, UDPGetConnectURI) { } #if LLDB_ENABLE_POSIX -TEST_F(SocketTest, DomainGetConnectURI) { +TEST_P(SocketTest, DomainGetConnectURI) { llvm::SmallString<64> domain_path; std::error_code EC = llvm::sys::fs::createUniqueDirectory("DomainListenConnectAccept", domain_path); @@ -202,3 +227,13 @@ TEST_F(SocketTest, DomainGetConnectURI) { EXPECT_EQ(path, domain_path); } #endif + +INSTANTIATE_TEST_CASE_P( + SocketTests, SocketTest, + testing::Values(SocketTestParams{/*is_ipv6=*/false, + /*localhost_ip=*/"127.0.0.1"}, + SocketTestParams{/*is_ipv6=*/true, /*localhost_ip=*/"::1"}), + // Prints "SocketTests/SocketTest.DecodeHostAndPort/ipv4" etc. in test logs. + [](const testing::TestParamInfo &info) { + return info.param.is_ipv6 ? "ipv6" : "ipv4"; + }); diff --git a/lldb/unittests/Host/SocketTestUtilities.cpp b/lldb/unittests/Host/SocketTestUtilities.cpp index e2006b85115db..3b52a66a09eb3 100644 --- a/lldb/unittests/Host/SocketTestUtilities.cpp +++ b/lldb/unittests/Host/SocketTestUtilities.cpp @@ -101,13 +101,14 @@ static bool CheckIPSupport(llvm::StringRef Proto, llvm::StringRef Addr) { "Creating a canary {0} TCP socket failed: {1}.", Proto, Err) .str(); - bool HasAddrNotAvail = false; + bool HasProtocolError = false; handleAllErrors(std::move(Err), [&](std::unique_ptr ECErr) { - if (ECErr->convertToErrorCode() == - std::make_error_code(std::errc::address_not_available)) - HasAddrNotAvail = true; + std::error_code ec = ECErr->convertToErrorCode(); + if (ec == std::make_error_code(std::errc::address_family_not_supported) || + ec == std::make_error_code(std::errc::address_not_available)) + HasProtocolError = true; }); - if (HasAddrNotAvail) { + if (HasProtocolError) { GTEST_LOG_(WARNING) << llvm::formatv( "Assuming the host does not support {0}. Skipping test.", Proto) From 655af658c93bf7f133341e7eb5a2dfa176282781 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 29 Sep 2020 13:55:33 -0700 Subject: [PATCH 192/544] [MLIR] Add async.value type to Async dialect Return values from async regions as !async.value<...>. Reviewed By: mehdi_amini, csigg Differential Revision: https://reviews.llvm.org/D88510 --- mlir/include/mlir/Dialect/Async/IR/Async.h | 16 +++ .../mlir/Dialect/Async/IR/AsyncBase.td | 20 +++ .../include/mlir/Dialect/Async/IR/AsyncOps.td | 20 +-- mlir/lib/Dialect/Async/IR/Async.cpp | 120 +++++++++++++++++- mlir/test/Dialect/Async/ops.mlir | 38 +++++- 5 files changed, 198 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/Dialect/Async/IR/Async.h b/mlir/include/mlir/Dialect/Async/IR/Async.h index f61d07b7d0dfd..b1cf25ecea57e 100644 --- a/mlir/include/mlir/Dialect/Async/IR/Async.h +++ b/mlir/include/mlir/Dialect/Async/IR/Async.h @@ -22,12 +22,28 @@ namespace mlir { namespace async { +namespace detail { +struct ValueTypeStorage; +} // namespace detail + /// The token type to represent asynchronous operation completion. class TokenType : public Type::TypeBase { public: using Base::Base; }; +/// The value type to represent values returned from asynchronous operations. +class ValueType + : public Type::TypeBase { +public: + using Base::Base; + + /// Get or create an async ValueType with the provided value type. + static ValueType get(Type valueType); + + Type getValueType(); +}; + } // namespace async } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td b/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td index ac67e9f1609d7..2097f05747dda 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncBase.td @@ -39,4 +39,24 @@ def Async_TokenType : DialectType + : DialectType()">, + SubstLeaves<"$_self", + "$_self.cast<::mlir::async::ValueType>().getValueType()", + type.predicate> + ]>, "async value type with " # type.description # " underlying type"> { + let typeDescription = [{ + `async.value` represents a value returned by asynchronous operations, + which may or may not be available currently, but will be available at some + point in the future. + }]; + + Type valueType = type; +} + +def Async_AnyValueType : Type()">, + "async value type">; + #endif // ASYNC_BASE_TD diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td index b84f7c4028016..2dcc9a8f86fd7 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td @@ -40,24 +40,24 @@ def Async_ExecuteOp : Async_Op<"execute"> { state). All dependencies must be made explicit with async execute arguments (`async.token` or `async.value`). - Example: - ```mlir - %0 = async.execute { - "compute0"(...) - async.yield - } : !async.token + %done, %values = async.execute { + %0 = "compute0"(...) : !some.type + async.yield %1 : f32 + } : !async.token, !async.value - %1 = "compute1"(...) + %1 = "compute1"(...) : !some.type ``` }]; // TODO: Take async.tokens/async.values as arguments. let arguments = (ins ); - let results = (outs Async_TokenType:$done); + let results = (outs Async_TokenType:$done, + Variadic:$values); let regions = (region SizedRegion<1>:$body); - let assemblyFormat = "$body attr-dict `:` type($done)"; + let printer = [{ return ::mlir::async::print(p, *this); }]; + let parser = [{ return ::mlir::async::parse$cppClass(parser, result); }]; } def Async_YieldOp : @@ -71,6 +71,8 @@ def Async_YieldOp : let arguments = (ins Variadic:$operands); let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; + + let verifier = [{ return ::mlir::async::verify(*this); }]; } #endif // ASYNC_OPS diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index 61057870d3013..4d9ede13f19cd 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/raw_ostream.h" -using namespace mlir; -using namespace mlir::async; +namespace mlir { +namespace async { void AsyncDialect::initialize() { addOperations< @@ -28,6 +28,7 @@ void AsyncDialect::initialize() { #include "mlir/Dialect/Async/IR/AsyncOps.cpp.inc" >(); addTypes(); + addTypes(); } /// Parse a type registered to this dialect. @@ -39,6 +40,15 @@ Type AsyncDialect::parseType(DialectAsmParser &parser) const { if (keyword == "token") return TokenType::get(getContext()); + if (keyword == "value") { + Type ty; + if (parser.parseLess() || parser.parseType(ty) || parser.parseGreater()) { + parser.emitError(parser.getNameLoc(), "failed to parse async value type"); + return Type(); + } + return ValueType::get(ty); + } + parser.emitError(parser.getNameLoc(), "unknown async type: ") << keyword; return Type(); } @@ -46,9 +56,113 @@ Type AsyncDialect::parseType(DialectAsmParser &parser) const { /// Print a type registered to this dialect. void AsyncDialect::printType(Type type, DialectAsmPrinter &os) const { TypeSwitch(type) - .Case([&](Type) { os << "token"; }) + .Case([&](TokenType) { os << "token"; }) + .Case([&](ValueType valueTy) { + os << "value<"; + os.printType(valueTy.getValueType()); + os << '>'; + }) .Default([](Type) { llvm_unreachable("unexpected 'async' type kind"); }); } +//===----------------------------------------------------------------------===// +/// ValueType +//===----------------------------------------------------------------------===// + +namespace detail { + +// Storage for `async.value` type, the only member is the wrapped type. +struct ValueTypeStorage : public TypeStorage { + ValueTypeStorage(Type valueType) : valueType(valueType) {} + + /// The hash key used for uniquing. + using KeyTy = Type; + bool operator==(const KeyTy &key) const { return key == valueType; } + + /// Construction. + static ValueTypeStorage *construct(TypeStorageAllocator &allocator, + Type valueType) { + return new (allocator.allocate()) + ValueTypeStorage(valueType); + } + + Type valueType; +}; + +} // namespace detail + +ValueType ValueType::get(Type valueType) { + return Base::get(valueType.getContext(), valueType); +} + +Type ValueType::getValueType() { return getImpl()->valueType; } + +//===----------------------------------------------------------------------===// +// YieldOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(YieldOp op) { + // Get the underlying value types from async values returned from the + // parent `async.execute` operation. + auto executeOp = op.getParentOfType(); + auto types = llvm::map_range(executeOp.values(), [](const OpResult &result) { + return result.getType().cast().getValueType(); + }); + + if (!std::equal(types.begin(), types.end(), op.getOperandTypes().begin())) + return op.emitOpError("Operand types do not match the types returned from " + "the parent ExecuteOp"); + + return success(); +} + +//===----------------------------------------------------------------------===// +/// ExecuteOp +//===----------------------------------------------------------------------===// + +static void print(OpAsmPrinter &p, ExecuteOp op) { + p << "async.execute "; + p.printRegion(op.body()); + p.printOptionalAttrDict(op.getAttrs()); + p << " : "; + p.printType(op.done().getType()); + if (!op.values().empty()) + p << ", "; + llvm::interleaveComma(op.values(), p, [&](const OpResult &result) { + p.printType(result.getType()); + }); +} + +static ParseResult parseExecuteOp(OpAsmParser &parser, OperationState &result) { + MLIRContext *ctx = result.getContext(); + + // Parse asynchronous region. + Region *body = result.addRegion(); + if (parser.parseRegion(*body, /*arguments=*/{}, /*argTypes=*/{}, + /*enableNameShadowing=*/false)) + return failure(); + + // Parse operation attributes. + NamedAttrList attrs; + if (parser.parseOptionalAttrDict(attrs)) + return failure(); + result.addAttributes(attrs); + + // Parse result types. + SmallVector resultTypes; + if (parser.parseColonTypeList(resultTypes)) + return failure(); + + // First result type must be an async token type. + if (resultTypes.empty() || resultTypes.front() != TokenType::get(ctx)) + return failure(); + parser.addTypesToList(resultTypes, result.types); + + return success(); +} + +} // namespace async +} // namespace mlir + #define GET_OP_CLASSES #include "mlir/Dialect/Async/IR/AsyncOps.cpp.inc" diff --git a/mlir/test/Dialect/Async/ops.mlir b/mlir/test/Dialect/Async/ops.mlir index 2f5d0123e2157..d23bc003dd3a5 100644 --- a/mlir/test/Dialect/Async/ops.mlir +++ b/mlir/test/Dialect/Async/ops.mlir @@ -1,16 +1,46 @@ // RUN: mlir-opt %s | FileCheck %s -// CHECK-LABEL: @identity -func @identity(%arg0 : !async.token) -> !async.token { +// CHECK-LABEL: @identity_token +func @identity_token(%arg0 : !async.token) -> !async.token { // CHECK: return %arg0 : !async.token return %arg0 : !async.token } +// CHECK-LABEL: @identity_value +func @identity_value(%arg0 : !async.value) -> !async.value { + // CHECK: return %arg0 : !async.value + return %arg0 : !async.value +} + // CHECK-LABEL: @empty_async_execute func @empty_async_execute() -> !async.token { - %0 = async.execute { + %done = async.execute { async.yield } : !async.token - return %0 : !async.token + // CHECK: return %done : !async.token + return %done : !async.token +} + +// CHECK-LABEL: @return_async_value +func @return_async_value() -> !async.value { + %done, %values = async.execute { + %cst = constant 1.000000e+00 : f32 + async.yield %cst : f32 + } : !async.token, !async.value + + // CHECK: return %values : !async.value + return %values : !async.value +} + +// CHECK-LABEL: @return_async_values +func @return_async_values() -> (!async.value, !async.value) { + %done, %values:2 = async.execute { + %cst1 = constant 1.000000e+00 : f32 + %cst2 = constant 2.000000e+00 : f32 + async.yield %cst1, %cst2 : f32, f32 + } : !async.token, !async.value, !async.value + + // CHECK: return %values#0, %values#1 : !async.value, !async.value + return %values#0, %values#1 : !async.value, !async.value } From ad865d9d10b8cf93738470175aae1be7a4a3eb6b Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 30 Sep 2020 11:24:10 -0700 Subject: [PATCH 193/544] [lldb-vscode] Allow an empty 'breakpoints' field to clear breakpoints. Per the DAP spec for SetBreakpoints [1], the way to clear breakpoints is: `To clear all breakpoint for a source, specify an empty array.` However, leaving the breakpoints field unset is also a well formed request (note the `breakpoints?:` in the `SetBreakpointsArguments` definition). If it's unset, we have a couple choices: 1. Crash (current behavior) 2. Clear breakpoints 3. Return an error response that the breakpoints field is missing. I propose we do (2) instead of (1), and treat an unset breakpoints field the same as an empty breakpoints field. [1] https://microsoft.github.io/debug-adapter-protocol/specification#Requests_SetBreakpoints Reviewed By: wallace, labath Differential Revision: https://reviews.llvm.org/D88513 --- .../test/tools/lldb-vscode/vscode.py | 22 +++++----- .../breakpoint/TestVSCode_setBreakpoints.py | 42 ++++++++++++++++++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 43 +++++++++++-------- 3 files changed, 78 insertions(+), 29 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 834e33ef5c3da..70f29cdd3d756 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -728,24 +728,26 @@ def request_scopes(self, frameId): def request_setBreakpoints(self, file_path, line_array, condition=None, hitCondition=None): (dir, base) = os.path.split(file_path) - breakpoints = [] - for line in line_array: - bp = {'line': line} - if condition is not None: - bp['condition'] = condition - if hitCondition is not None: - bp['hitCondition'] = hitCondition - breakpoints.append(bp) source_dict = { 'name': base, 'path': file_path } args_dict = { 'source': source_dict, - 'breakpoints': breakpoints, - 'lines': '%s' % (line_array), 'sourceModified': False, } + if line_array is not None: + args_dict['lines'] = '%s' % line_array + breakpoints = [] + for line in line_array: + bp = {'line': line} + if condition is not None: + bp['condition'] = condition + if hitCondition is not None: + bp['hitCondition'] = hitCondition + breakpoints.append(bp) + args_dict['breakpoints'] = breakpoints + command_dict = { 'command': 'setBreakpoints', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py b/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py index c08c4d70489f9..23f4ad216ea2f 100644 --- a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py +++ b/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py @@ -219,6 +219,48 @@ def test_set_and_clear(self): self.assertTrue(breakpoint['verified'], "expect breakpoint still verified") + @skipIfWindows + @skipIfRemote + def test_clear_breakpoints_unset_breakpoints(self): + '''Test clearing breakpoints like test_set_and_clear, but clear + breakpoints by omitting the breakpoints array instead of sending an + empty one.''' + lines = [line_number('main.cpp', 'break 12'), + line_number('main.cpp', 'break 13')] + + # Visual Studio Code Debug Adaptors have no way to specify the file + # without launching or attaching to a process, so we must start a + # process in order to be able to set breakpoints. + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + + # Set one breakpoint and verify that it got set correctly. + response = self.vscode.request_setBreakpoints(self.main_path, lines) + line_to_id = {} + breakpoints = response['body']['breakpoints'] + self.assertEquals(len(breakpoints), len(lines), + "expect %u source breakpoints" % (len(lines))) + for (breakpoint, index) in zip(breakpoints, range(len(lines))): + line = breakpoint['line'] + self.assertTrue(line, lines[index]) + # Store the "id" of the breakpoint that was set for later + line_to_id[line] = breakpoint['id'] + self.assertTrue(line in lines, "line expected in lines array") + self.assertTrue(breakpoint['verified'], + "expect breakpoint verified") + + # Now clear all breakpoints for the source file by not setting the + # lines array. + lines = None + response = self.vscode.request_setBreakpoints(self.main_path, lines) + breakpoints = response['body']['breakpoints'] + self.assertEquals(len(breakpoints), 0, "expect no source breakpoints") + + # Verify with the target that all breakpoints have been cleared. + response = self.vscode.request_testGetTargetBreakpoints() + breakpoints = response['body']['breakpoints'] + self.assertEquals(len(breakpoints), 0, "expect no source breakpoints") + @skipIfWindows @skipIfRemote def test_functionality(self): diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 3b0817c71e62f..b64829423e301 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -1936,27 +1936,32 @@ void request_setBreakpoints(const llvm::json::Object &request) { // Decode the source breakpoint infos for this "setBreakpoints" request SourceBreakpointMap request_bps; - for (const auto &bp : *breakpoints) { - auto bp_obj = bp.getAsObject(); - if (bp_obj) { - SourceBreakpoint src_bp(*bp_obj); - request_bps[src_bp.line] = src_bp; - - // We check if this breakpoint already exists to update it - auto existing_source_bps = g_vsc.source_breakpoints.find(path); - if (existing_source_bps != g_vsc.source_breakpoints.end()) { - const auto &existing_bp = existing_source_bps->second.find(src_bp.line); - if (existing_bp != existing_source_bps->second.end()) { - existing_bp->second.UpdateBreakpoint(src_bp); - AppendBreakpoint(existing_bp->second.bp, response_breakpoints, path, - src_bp.line); - continue; + // "breakpoints" may be unset, in which case we treat it the same as being set + // to an empty array. + if (breakpoints) { + for (const auto &bp : *breakpoints) { + auto bp_obj = bp.getAsObject(); + if (bp_obj) { + SourceBreakpoint src_bp(*bp_obj); + request_bps[src_bp.line] = src_bp; + + // We check if this breakpoint already exists to update it + auto existing_source_bps = g_vsc.source_breakpoints.find(path); + if (existing_source_bps != g_vsc.source_breakpoints.end()) { + const auto &existing_bp = + existing_source_bps->second.find(src_bp.line); + if (existing_bp != existing_source_bps->second.end()) { + existing_bp->second.UpdateBreakpoint(src_bp); + AppendBreakpoint(existing_bp->second.bp, response_breakpoints, path, + src_bp.line); + continue; + } } + // At this point the breakpoint is new + src_bp.SetBreakpoint(path.data()); + AppendBreakpoint(src_bp.bp, response_breakpoints, path, src_bp.line); + g_vsc.source_breakpoints[path][src_bp.line] = std::move(src_bp); } - // At this point the breakpoint is new - src_bp.SetBreakpoint(path.data()); - AppendBreakpoint(src_bp.bp, response_breakpoints, path, src_bp.line); - g_vsc.source_breakpoints[path][src_bp.line] = std::move(src_bp); } } From afaeb6af79a4278249ef9114755e5685d0b35984 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Wed, 30 Sep 2020 11:46:59 -0700 Subject: [PATCH 194/544] Fix crash in SBStructuredData::GetDescription() when there's no StructuredDataPlugin. Also, use the StructuredData::Dump method to print the StructuredData if there is no plugin, rather than just returning an error. Differential Revision: https://reviews.llvm.org/D88266 --- lldb/include/lldb/Core/StructuredDataImpl.h | 14 +++++++++----- .../sbstructureddata/TestStructuredDataAPI.py | 7 +++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lldb/include/lldb/Core/StructuredDataImpl.h b/lldb/include/lldb/Core/StructuredDataImpl.h index 9aea645a3ea6a..929ce21fb2f92 100644 --- a/lldb/include/lldb/Core/StructuredDataImpl.h +++ b/lldb/include/lldb/Core/StructuredDataImpl.h @@ -68,14 +68,18 @@ class StructuredDataImpl { return error; } - // Grab the plugin. - auto plugin_sp = lldb::StructuredDataPluginSP(m_plugin_wp); + // Grab the plugin + lldb::StructuredDataPluginSP plugin_sp = m_plugin_wp.lock(); + + // If there's no plugin, call underlying data's dump method: if (!plugin_sp) { - error.SetErrorString("Cannot pretty print structured data: " - "plugin doesn't exist."); + if (!m_data_sp) { + error.SetErrorString("No data to describe."); + return error; + } + m_data_sp->Dump(stream, true); return error; } - // Get the data's description. return plugin_sp->GetDescription(m_data_sp, stream); } diff --git a/lldb/test/API/python_api/sbstructureddata/TestStructuredDataAPI.py b/lldb/test/API/python_api/sbstructureddata/TestStructuredDataAPI.py index f5efdfa8b37f3..a1a318517bed4 100644 --- a/lldb/test/API/python_api/sbstructureddata/TestStructuredDataAPI.py +++ b/lldb/test/API/python_api/sbstructureddata/TestStructuredDataAPI.py @@ -35,6 +35,13 @@ def structured_data_api_test(self): # Tests for invalid data type self.invalid_struct_test(example) + # Test that GetDescription works: + s.Clear() + error = example.GetDescription(s) + self.assertTrue(error.Success(), "GetDescription works") + if not "key_float" in s.GetData(): + self.fail("FAILED: could not find key_float in description output") + dict_struct = lldb.SBStructuredData() dict_struct = example.GetValueForKey("key_dict") From 2d761a368c3637cb6a6b05eb10ac8d839efe77cc Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 30 Sep 2020 11:49:58 -0700 Subject: [PATCH 195/544] [test][NewPM][SampleProfile] Fix more tests under NPM These all have separate legacy and new PM RUN lines. --- llvm/test/Transforms/SampleProfile/flattened.ll | 6 +++--- llvm/test/Transforms/SampleProfile/inline-mergeprof.ll | 4 ++-- .../SampleProfile/profile-sample-accurate.ll | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/Transforms/SampleProfile/flattened.ll b/llvm/test/Transforms/SampleProfile/flattened.ll index 7a1e53b966c7c..614e1bc5ed398 100644 --- a/llvm/test/Transforms/SampleProfile/flattened.ll +++ b/llvm/test/Transforms/SampleProfile/flattened.ll @@ -1,13 +1,13 @@ ; Check flattened profile will not be read in thinlto postlink. -; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -perform-thinlto=true -S | FileCheck %s +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -perform-thinlto=true -enable-new-pm=0 -S | FileCheck %s ; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s ; ; Check flattened profile will be read in thinlto prelink. -; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=true -S | FileCheck %s --check-prefix=PRELINK +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=true -enable-new-pm=0 -S | FileCheck %s --check-prefix=PRELINK ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=PRELINK ; ; Check flattened profile will be read in non-thinlto mode. -; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -S | FileCheck %s --check-prefix=NOTHINLTO +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -enable-new-pm=0 -S | FileCheck %s --check-prefix=NOTHINLTO ; RUN: opt < %s -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=NOTHINLTO ; ; CHECK-NOT: !{!"ProfileFormat", !"SampleProfile"} diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll index dfd1f1f20209c..01d1ecb78a5e6 100644 --- a/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll +++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll @@ -1,6 +1,6 @@ ; Test we lose details of not inlined profile without '-sample-profile-merge-inlinee' -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -S | FileCheck -check-prefix=SCALE %s -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck -check-prefix=SCALE %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -enable-new-pm=0 -S | FileCheck -check-prefix=SCALE %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -S | FileCheck -check-prefix=SCALE %s ; Test we properly merge not inlined profile with '-sample-profile-merge-inlinee' diff --git a/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll b/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll index 0ec187c3bb862..dfd8fd9577f78 100644 --- a/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll +++ b/llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll @@ -1,17 +1,17 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -enable-new-pm=0 -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -enable-new-pm=0 -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT ; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list.text %S/Inputs/profsampleacc.extbinary.afdo -o %t.symlist.afdo -; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST +; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -enable-new-pm=0 -S | FileCheck %s --check-prefix=PROFSYMLIST ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST ; ; If -profile-accurate-for-symsinlist and -profile-sample-accurate both present, ; -profile-sample-accurate will override -profile-accurate-for-symsinlist. -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -enable-new-pm=0 -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -enable-new-pm=0 -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT ; ; Original C++ test case From 490b556a0f3c9daddd05651d945662b93b3b13b9 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 30 Sep 2020 14:58:17 -0400 Subject: [PATCH 196/544] [libc++] Make sure we don't attempt to run check-cxx-abilist when libc++ doesn't define new/delete That would make the test fail spuriously because we don't generate an ABI list for that configuration. --- libcxx/lib/abi/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/lib/abi/CMakeLists.txt b/libcxx/lib/abi/CMakeLists.txt index 0fa3aacfb0e2b..cf7457fe8123d 100644 --- a/libcxx/lib/abi/CMakeLists.txt +++ b/libcxx/lib/abi/CMakeLists.txt @@ -22,7 +22,8 @@ if (EXISTS "${ABILIST_FILE}" AND ("${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "libcxxabi" OR (APPLE AND "${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "default")) AND NOT LIBCXX_ABI_UNSTABLE - AND LIBCXX_ENABLE_EXCEPTIONS) + AND LIBCXX_ENABLE_EXCEPTIONS + AND LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS) add_custom_target(check-cxx-abilist ${SYMDIFF_EXE} --only-stdlib-symbols --strict ${ABILIST_FILE} $ From bdc85292fb0f2a3965c8c65f9461d285b04841ed Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 30 Sep 2020 15:02:16 -0400 Subject: [PATCH 197/544] Revert "[OpenMP] Add Error Handling for Conflicting Pointer Sizes for Target Offload" Failing tests on Arm due to the tests automatically populating incomatible pointer width architectures. Reverting until the tests are updated. Failing tests: OpenMP/distribute_parallel_for_num_threads_codegen.cpp OpenMP/distribute_parallel_for_if_codegen.cpp OpenMP/distribute_parallel_for_simd_if_codegen.cpp OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp OpenMP/teams_distribute_parallel_for_if_codegen.cpp OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp This reverts commit 9d2378b59150f6f1cb5c9cf42ea06b0bb57029a1. --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 1 - clang/lib/Frontend/CompilerInvocation.cpp | 8 -------- ...get_parallel_reduction_codegen_tbaa_PR46146.cpp | 4 ++-- .../target_incompatible_architecture_messages.cpp | 14 -------------- 4 files changed, 2 insertions(+), 25 deletions(-) delete mode 100644 clang/test/OpenMP/target_incompatible_architecture_messages.cpp diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 29bc19e5a84e5..3bf1bb19b7ae3 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -253,7 +253,6 @@ def err_drv_optimization_remark_format : Error< "unknown remark serializer format: '%0'">; def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">; def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">; -def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">; def err_drv_omp_host_ir_file_not_found : Error< "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">; def err_drv_omp_host_target_not_supported : Error< diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bbdf0e3be7ae0..b402f53cc765b 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3206,14 +3206,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); - else if ((T.isArch64Bit() && TT.isArch32Bit()) || - (T.isArch64Bit() && TT.isArch16Bit()) || - (T.isArch32Bit() && TT.isArch64Bit()) || - (T.isArch32Bit() && TT.isArch16Bit()) || - (T.isArch16Bit() && TT.isArch32Bit()) || - (T.isArch16Bit() && TT.isArch64Bit())) - Diags.Report(diag::err_drv_incompatible_omp_arch) - << A->getValue(i) << T.str(); else Opts.OMPTargetTriples.push_back(TT); } diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 031c7b6c778e4..aefe00f1cadf9 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/clang/test/OpenMP/target_incompatible_architecture_messages.cpp b/clang/test/OpenMP/target_incompatible_architecture_messages.cpp deleted file mode 100644 index f0f9d236d764d..0000000000000 --- a/clang/test/OpenMP/target_incompatible_architecture_messages.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: not %clang_cc1 -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -x c++ -fopenmp -triple i386-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -o - %s 2>&1 | FileCheck %s -// CHECK: error: OpenMP target architecture '{{.+}}' pointer size is incompatible with host '{{.+}}' -#ifndef HEADER -#define HEADER - -void test() { -#pragma omp target - {} -} - -#endif From 81921ebc430536ae5718da70a54328c790c8ae19 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 30 Sep 2020 15:09:21 -0400 Subject: [PATCH 198/544] [CodeGen] improve coverage for float (32-bit) type of NAN; NFC Goes with D88238 --- clang/test/CodeGen/builtin-nan-exception.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/builtin-nan-exception.c b/clang/test/CodeGen/builtin-nan-exception.c index 2acf0c4390ec8..a0de25e52ebe6 100644 --- a/clang/test/CodeGen/builtin-nan-exception.c +++ b/clang/test/CodeGen/builtin-nan-exception.c @@ -5,18 +5,28 @@ // Run a variety of targets to ensure there's no target-based difference. -// The builtin always produces a 64-bit (double). // An SNaN with no payload is formed by setting the bit after the // the quiet bit (MSB of the significand). // CHECK: float 0x7FF8000000000000, float 0x7FF4000000000000 -// CHECK: double 0x7FF8000000000000, double 0x7FF4000000000000 float f[] = { + __builtin_nanf(""), + __builtin_nansf(""), +}; + + +// Doubles are created and converted to floats. + +// CHECK: float 0x7FF8000000000000, float 0x7FF4000000000000 + +float converted_to_float[] = { __builtin_nan(""), __builtin_nans(""), }; +// CHECK: double 0x7FF8000000000000, double 0x7FF4000000000000 + double d[] = { __builtin_nan(""), __builtin_nans(""), From 1b60f63e4fd041550019b692dc7bf490dce2c75c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 30 Sep 2020 15:11:51 -0400 Subject: [PATCH 199/544] Revert "[OpenMP] Replace OpenMP RTL Functions With OMPIRBuilder and OMPKinds.def" Failing tests on Arm due to the tests automatically populating incomatible pointer width architectures. Reverting until the tests are updated. Failing tests: OpenMP/distribute_parallel_for_num_threads_codegen.cpp OpenMP/distribute_parallel_for_if_codegen.cpp OpenMP/distribute_parallel_for_simd_if_codegen.cpp OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp OpenMP/teams_distribute_parallel_for_if_codegen.cpp OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp This reverts commit 90eaedda9b8ef46e2c0c1b8bce33e98a3adbb68c. --- clang/lib/CodeGen/CGOpenMPRuntime.h | 5 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 590 +++++++++++++++--- clang/lib/CodeGen/CodeGenModule.h | 10 + clang/test/OpenMP/nvptx_parallel_codegen.cpp | 8 +- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 55 +- llvm/test/Transforms/OpenMP/add_attributes.ll | 338 +++++----- 6 files changed, 687 insertions(+), 319 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index e39c2e11390e1..41fa9f5345aa8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -306,9 +306,6 @@ class CGOpenMPRuntime { CodeGenModule &CGM; StringRef FirstSeparator, Separator; - /// An OpenMP-IR-Builder instance. - llvm::OpenMPIRBuilder OMPBuilder; - /// Constructor allowing to redefine the name separator for the variables. explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator); @@ -389,6 +386,8 @@ class CGOpenMPRuntime { llvm::Value *getCriticalRegionLock(StringRef CriticalName); private: + /// An OpenMP-IR-Builder instance. + llvm::OpenMPIRBuilder OMPBuilder; /// Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap OpenMPDebugLocMapTy; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index dbd24d33cc376..d9ef6c2a10789 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -28,6 +28,96 @@ using namespace CodeGen; using namespace llvm::omp; namespace { +enum OpenMPRTLFunctionNVPTX { + /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, + /// int16_t RequiresOMPRuntime); + OMPRTL_NVPTX__kmpc_kernel_init, + /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + OMPRTL_NVPTX__kmpc_kernel_deinit, + /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); + OMPRTL_NVPTX__kmpc_spmd_kernel_init, + /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, + /// Call to void __kmpc_kernel_prepare_parallel(void + /// *outlined_function); + OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, + /// Call to bool __kmpc_kernel_parallel(void **outlined_function); + OMPRTL_NVPTX__kmpc_kernel_parallel, + /// Call to void __kmpc_kernel_end_parallel(); + OMPRTL_NVPTX__kmpc_kernel_end_parallel, + /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_serialized_parallel, + /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_end_serialized_parallel, + /// Call to int32_t __kmpc_shuffle_int32(int32_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int32, + /// Call to int64_t __kmpc_shuffle_int64(int64_t element, + /// int16_t lane_offset, int16_t warp_size); + OMPRTL_NVPTX__kmpc_shuffle_int64, + /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, + /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + /// global_tid, void *global_buffer, int32_t num_of_records, void* + /// reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + /// *buffer, int idx, void *reduce_data)); + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, + /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); + OMPRTL_NVPTX__kmpc_end_reduce_nowait, + /// Call to void __kmpc_data_sharing_init_stack(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack, + /// Call to void __kmpc_data_sharing_init_stack_spmd(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, + /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, + /// int16_t UseSharedMemory); + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, + /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t + /// UseSharedMemory); + OMPRTL_NVPTX__kmpc_data_sharing_push_stack, + /// Call to void __kmpc_data_sharing_pop_stack(void *a); + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, + /// Call to void __kmpc_begin_sharing_variables(void ***args, + /// size_t n_args); + OMPRTL_NVPTX__kmpc_begin_sharing_variables, + /// Call to void __kmpc_end_sharing_variables(); + OMPRTL_NVPTX__kmpc_end_sharing_variables, + /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) + OMPRTL_NVPTX__kmpc_get_shared_variables, + /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_parallel_level, + /// Call to int8_t __kmpc_is_spmd_exec_mode(); + OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, + /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, + /// const void *buf, size_t size, int16_t is_shared, const void **res); + OMPRTL_NVPTX__kmpc_get_team_static_memory, + /// Call to void __kmpc_restore_team_static_memory(int16_t + /// isSPMDExecutionMode, int16_t is_shared); + OMPRTL_NVPTX__kmpc_restore_team_static_memory, + /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_barrier_simple_spmd, + /// Call to int32_t __kmpc_warp_active_thread_mask(void); + OMPRTL_NVPTX__kmpc_warp_active_thread_mask, + /// Call to void __kmpc_syncwarp(int32_t Mask); + OMPRTL_NVPTX__kmpc_syncwarp, +}; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { @@ -1153,13 +1243,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_init), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); emitGenericVarsProlog(CGF, WST.Loc); } @@ -1182,9 +1272,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF, // Signal termination condition. // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_deinit), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -1258,14 +1347,13 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), /*RequiresDataSharing=*/Bld.getInt16(0)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); if (RequiresFullRuntime) { // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); } CGF.EmitBranch(ExecuteBB); @@ -1291,9 +1379,9 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF, // DeInitialize the OMP state in the runtime; called by all active threads. llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2), - Args); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1327,7 +1415,7 @@ void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) { } void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, - WorkerFunctionState &WST) { + WorkerFunctionState &WST) { // // The workers enter this loop and wait for parallel work from the master. // When the master encounters a parallel region it sets up the work + variable @@ -1362,10 +1450,8 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {WorkFn.getPointer()}; - llvm::Value *Ret = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_parallel), - Args); + llvm::Value *Ret = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. @@ -1430,9 +1516,9 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // Signal end of parallel region. CGF.EmitBlock(TerminateBB); - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel), - llvm::None); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -1447,6 +1533,328 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, clearLocThreadIdInsertPt(CGF); } +/// Returns specified OpenMP runtime function for the current OpenMP +/// implementation. Specialized for the NVPTX device. +/// \param Function OpenMP runtime function. +/// \return Specified function. +llvm::FunctionCallee +CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { + llvm::FunctionCallee RTLFn = nullptr; + switch (static_cast(Function)) { + case OMPRTL_NVPTX__kmpc_kernel_init: { + // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t + // RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_deinit: { + // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); + break; + } + case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { + // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, + // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); + break; + } + case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { + // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { + /// Build void __kmpc_kernel_prepare_parallel( + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_parallel: { + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; + llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); + auto *FnTy = + llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { + /// Build void __kmpc_kernel_end_parallel(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_shuffle_int32: { + // Build int32_t __kmpc_shuffle_int32(int32_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); + break; + } + case OMPRTL_NVPTX__kmpc_shuffle_int64: { + // Build int64_t __kmpc_shuffle_int64(int64_t element, + // int16_t lane_offset, int16_t warp_size); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); + break; + } + case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { + // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, + // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* + // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t + // lane_id, int16_t lane_offset, int16_t Algorithm Version), void + // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); + break; + } + case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); + break; + } + case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { + // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + // global_tid, void *global_buffer, int32_t num_of_records, void* + // reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t shortCircuit), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + // *buffer, int idx, void *reduce_data)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, + CGM.VoidPtrTy}; + auto *GlobalListFnTy = + llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { + /// Build void __kmpc_data_sharing_init_stack(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { + /// Build void __kmpc_data_sharing_init_stack_spmd(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { + // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, + // int16_t UseSharedMemory); + llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { + // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t + // UseSharedMemory); + llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { + // Build void __kmpc_data_sharing_pop_stack(void *a); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, + /*Name=*/"__kmpc_data_sharing_pop_stack"); + break; + } + case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { + /// Build void __kmpc_begin_sharing_variables(void ***args, + /// size_t n_args); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_end_sharing_variables: { + /// Build void __kmpc_end_sharing_variables(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_get_shared_variables: { + /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); + break; + } + case OMPRTL_NVPTX__kmpc_parallel_level: { + // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); + break; + } + case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { + // Build int8_t __kmpc_is_spmd_exec_mode(); + auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); + break; + } + case OMPRTL_NVPTX__kmpc_get_team_static_memory: { + // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, + // const void *buf, size_t size, int16_t is_shared, const void **res); + llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, + CGM.Int16Ty, CGM.VoidPtrPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); + break; + } + case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { + // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, + // int16_t is_shared); + llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); + break; + } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); + break; + } + case OMPRTL__kmpc_barrier_simple_spmd: { + // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateConvergentRuntimeFunction( + FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); + break; + } + case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { + // Build int32_t __kmpc_warp_active_thread_mask(void); + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); + RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); + break; + } + case OMPRTL_NVPTX__kmpc_syncwarp: { + // Build void __kmpc_syncwarp(kmp_int32 Mask); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); + break; + } + } + return RTLFn; +} + void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, @@ -1749,14 +2157,12 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_parallel_level), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); IsTTD = Bld.CreateIsNull(PL); } - llvm::Value *IsSPMD = Bld.CreateIsNotNull( - CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -1790,8 +2196,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -1853,10 +2259,9 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.Int16Ty, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), - GlobalRecordSizeArg); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_get_team_static_memory), + GlobalRecordSizeArg); GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; @@ -1883,10 +2288,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), - IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack - : OMPRTL___kmpc_data_sharing_coalesced_push_stack), + createNVPTXRuntimeFunction( + IsInTTDRegion + ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack + : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -1985,8 +2390,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2014,8 +2419,7 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), Addr); } if (I->getSecond().GlobalRecordAddr) { @@ -2030,8 +2434,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { @@ -2052,15 +2456,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), IsInSharedMemory}; CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_restore_team_static_memory), Args); } } else { - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); } } } @@ -2132,11 +2535,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), Args, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2152,8 +2553,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Prepare for parallel region. Indicate the outlined function. llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); // Create a private scope that will globalize the arguments @@ -2170,10 +2570,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *DataSharingArgs[] = { SharedArgsPtr, llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables), - DataSharingArgs); + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_begin_sharing_variables), + DataSharingArgs); // Store variable address in a list of references to pass to workers. unsigned Idx = 0; @@ -2207,8 +2606,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( syncCTAThreads(CGF); if (!CapturedVars.empty()) - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_end_sharing_variables)); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); // Remember for post-processing in worker loop. Work.emplace_back(WFn); @@ -2232,9 +2631,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - llvm::Value *IsSPMD = Bld.CreateIsNotNull( - CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2242,8 +2640,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_parallel_level), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); Bld.CreateCondBr(Res, SeqBB, MasterBB); @@ -2307,11 +2704,9 @@ void CGOpenMPRuntimeGPU::emitSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), Args, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2341,9 +2736,9 @@ void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd), - Args); + llvm::CallInst *Call = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); + Call->setConvergent(); } void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, @@ -2357,10 +2752,9 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_barrier), - Args); + llvm::CallInst *Call = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); + Call->setConvergent(); } void CGOpenMPRuntimeGPU::emitCriticalRegion( @@ -2376,8 +2770,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the mask of active threads in the warp. - llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask)); + llvm::Value *Mask = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = RT.getGPUThreadID(CGF); @@ -2419,9 +2813,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); // Reconverge active threads in the warp. - (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_syncwarp), - Mask); + (void)CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2471,15 +2864,14 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeGPU &RT = *(static_cast(&CGM.getOpenMPRuntime())); - llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder(); CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); assert(Size.getQuantity() <= 8 && "Unsupported bitwidth in shuffle instruction."); - RuntimeFunction ShuffleFn = Size.getQuantity() <= 4 - ? OMPRTL___kmpc_shuffle_int32 - : OMPRTL___kmpc_shuffle_int64; + OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 + ? OMPRTL_NVPTX__kmpc_shuffle_int32 + : OMPRTL_NVPTX__kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. QualType CastTy = CGF.getContext().getIntTypeForBitwidth( @@ -2489,8 +2881,7 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn), - {ElemCast, Offset, WarpSize}); + RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -4000,8 +4391,8 @@ void CGOpenMPRuntimeGPU::emitReduction( InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), Args); } else { assert(TeamsReduction && "expected teams reduction."); @@ -4050,8 +4441,8 @@ void CGOpenMPRuntimeGPU::emitReduction( BufferToGlobalRedFn}; Res = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -4086,8 +4477,7 @@ void CGOpenMPRuntimeGPU::emitReduction( RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( nullptr, llvm::None, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -4098,7 +4488,7 @@ void CGOpenMPRuntimeGPU::emitReduction( const VarDecl * CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const { if (!NativeParam->getType()->isReferenceType()) return NativeParam; QualType ArgType = NativeParam->getType(); @@ -4248,9 +4638,9 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 088ed2830fb81..19085b582f5a0 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1068,6 +1068,16 @@ class CodeGenModule : public CodeGenTypeCache { llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false, bool AssumeConvergent = false); + /// Create or return a runtime function declaration with the specified type + /// and name. This will automatically add the convergent attribute to the + /// function declaration. + llvm::FunctionCallee CreateConvergentRuntimeFunction( + llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + bool Local = false) { + return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true); + } + /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index bd9c988d46e7a..ad25e0d775d12 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -91,7 +91,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] -// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 @@ -321,10 +321,10 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]] // CHECK: ret void -// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]] +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]] // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}( @@ -377,6 +377,6 @@ int bar(int n){ // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] -// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}} +// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}} #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index ff5e69df32616..e93f836ea3fad 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -220,9 +220,6 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr) __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr) __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /* kmp_task_t */ VoidPtr) -__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16) -__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32) -__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr) #undef __OMP_FUNCTION_TYPE #undef OMP_FUNCTION_TYPE @@ -314,6 +311,8 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr, Int32, /* kmp_task_affinity_info_t */ VoidPtr) @@ -519,42 +518,17 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) -/// OpenMP Device runtime functions -__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16) -__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16) -__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16) -__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16) -__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) -__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) -__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) -__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) -__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32, - Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) -__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32) -__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32, - VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, - GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) - -__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) __OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) - -__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, + Int16) __OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) -__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) -__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) -__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) -__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) -__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) -__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, - Int16, VoidPtrPtr) -__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) -__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) -__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) + +/// Note that device runtime functions (in the following) do not necessarily +/// need attributes as we expect to see the definitions. +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__last, false, Void, ) @@ -603,8 +577,8 @@ __OMP_ATTRS_SET(DefaultAttrs, __OMP_ATTRS_SET(BarrierAttrs, OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) - : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) + ? AttributeSet(EnumAttr(NoUnwind)) + : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, OptimisticAttributes @@ -676,11 +650,6 @@ __OMP_ATTRS_SET(ReturnAlignedPtrAttrs, __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) -__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), - ParamAttrs(ReadOnlyPtrAttrs)) -__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), - ParamAttrs()) -__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(), diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index cf1bd246d2b29..e92447d79feac 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -888,313 +888,313 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK: declare dso_local i32 @omp_pause_resource_all(i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() +; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) +; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) +; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) +; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) +; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) +; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) +; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) +; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) +; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) +; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0 -; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) +; CHECK: ; Function Attrs: nounwind +; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) +; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) +; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) +; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) +; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) +; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_register_requires(i64) +; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) +; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) +; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) +; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0 ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) +; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) @@ -1212,52 +1212,52 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_int(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority() @@ -1326,7 +1326,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device() @@ -1356,25 +1356,25 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1 ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*) @@ -1419,7 +1419,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn -; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() +; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) @@ -1427,7 +1427,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1451,13 +1451,13 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1466,22 +1466,22 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1523,10 +1523,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1535,10 +1535,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn @@ -1598,7 +1598,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1622,7 +1622,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1634,16 +1634,16 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) -; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn From e9b38841619f20a6f4c8657880fd487083ba499a Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 29 Sep 2020 15:19:54 +0200 Subject: [PATCH 200/544] Add GDB prettyprinters for a few more MLIR types. Reviewed By: dblaikie, jpienaar Differential Revision: https://reviews.llvm.org/D87159 --- debuginfo-tests/CMakeLists.txt | 10 + debuginfo-tests/lit.cfg.py | 6 +- debuginfo-tests/lit.site.cfg.py.in | 2 + .../llvm-prettyprinters/gdb/lit.local.cfg | 4 + .../llvm-prettyprinters/gdb/llvm-support.cpp | 15 +- .../llvm-prettyprinters/gdb/mlir-support.cpp | 41 +++ .../llvm-prettyprinters/gdb/mlir-support.gdb | 112 +++++++++ mlir/utils/gdb-scripts/prettyprinters.py | 235 ++++++++++++++++++ 8 files changed, 417 insertions(+), 8 deletions(-) create mode 100644 debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp create mode 100644 debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb create mode 100644 mlir/utils/gdb-scripts/prettyprinters.py diff --git a/debuginfo-tests/CMakeLists.txt b/debuginfo-tests/CMakeLists.txt index d3ac0a4aad90a..4b6af5212fc8c 100644 --- a/debuginfo-tests/CMakeLists.txt +++ b/debuginfo-tests/CMakeLists.txt @@ -22,6 +22,16 @@ set(DEBUGINFO_TEST_DEPS not ) +if ("mlir" IN_LIST LLVM_ENABLE_PROJECTS) + add_llvm_executable(check-gdb-mlir-support + llvm-prettyprinters/gdb/mlir-support.cpp + ) + target_include_directories(check-gdb-mlir-support PRIVATE ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}/include) + target_link_libraries(check-gdb-mlir-support PRIVATE MLIRIR) + list(APPEND DEBUGINFO_TEST_DEPS check-gdb-mlir-support) + set(MLIR_SOURCE_DIR ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}) +endif() + if("compiler-rt" IN_LIST LLVM_ENABLE_PROJECTS) # llgdb-tests/asan.c and other asan* files. if(TARGET asan) diff --git a/debuginfo-tests/lit.cfg.py b/debuginfo-tests/lit.cfg.py index 4c45b723d2e9a..0e436506a96bd 100644 --- a/debuginfo-tests/lit.cfg.py +++ b/debuginfo-tests/lit.cfg.py @@ -157,6 +157,6 @@ def get_required_attr(config, attr_name): if apple_lldb_vers < 1000: config.available_features.add('apple-lldb-pre-1000') -llvm_config.feature_config([('--build-mode', { - 'Debug|RelWithDebInfo': 'debug-info' -})]) +llvm_config.feature_config( + [('--build-mode', {'Debug|RelWithDebInfo': 'debug-info'})] +) diff --git a/debuginfo-tests/lit.site.cfg.py.in b/debuginfo-tests/lit.site.cfg.py.in index d5893f577aed0..4ed49b83bc35f 100644 --- a/debuginfo-tests/lit.site.cfg.py.in +++ b/debuginfo-tests/lit.site.cfg.py.in @@ -20,6 +20,8 @@ config.target_triple = "@TARGET_TRIPLE@" config.host_arch = "@HOST_ARCH@" config.is_msvc = lit.util.pythonize_bool("@MSVC@") +config.mlir_src_root = "@MLIR_SOURCE_DIR@" + config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.python3_executable = "@Python3_EXECUTABLE@" diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg b/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg index be053e06f59e7..a4200fb726c2f 100644 --- a/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg +++ b/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg @@ -4,6 +4,10 @@ import lit.util if 'native' not in config.available_features or lit.util.which('gdb') is None: config.unsupported = True +if config.mlir_src_root: + config.substitutions.append(("%mlir_src_root", config.mlir_src_root)) + config.available_features.add('mlir') + config.suffixes = ['.gdb'] diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp index 04c84cd149b87..a6e535b56833e 100644 --- a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp +++ b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp @@ -53,8 +53,13 @@ auto SimpleIlist = []() { return Result; }(); -// Check expected instances to avoid compile errors. -auto CheckExpectedValue = static_cast(ExpectedValue); -auto CheckExpectedError = static_cast(ExpectedError); - -int main() { return 0; } +int main() { + // Reference symbols that might otherwise be stripped. + ArrayRef[0]; + MutableArrayRef[0]; + !ExpectedValue; + !ExpectedError; + *OptionalValue; + *OptionalNone; + return 0; +} diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp new file mode 100644 index 0000000000000..548a9ed2a206e --- /dev/null +++ b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp @@ -0,0 +1,41 @@ +#include "mlir/IR/Identifier.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/StandardTypes.h" + +mlir::MLIRContext Context; + +auto Identifier = mlir::Identifier::get("foo", &Context); +mlir::OperationName OperationName("FooOp", &Context); +mlir::Value Value({reinterpret_cast(0x8), + mlir::Value::Kind::TrailingOpResult}); + +mlir::Type Type(nullptr); +mlir::Type IndexType = mlir::IndexType::get(&Context); +mlir::Type IntegerType = + mlir::IntegerType::get(3, mlir::IntegerType::Unsigned, &Context); +mlir::Type FloatType = mlir::Float32Type::get(&Context); +mlir::Type MemRefType = mlir::MemRefType::get({4, 5}, FloatType); +mlir::Type UnrankedMemRefType = mlir::UnrankedMemRefType::get(IntegerType, 6); +mlir::Type VectorType = mlir::VectorType::get({1, 2}, FloatType); +mlir::Type TupleType = + mlir::TupleType::get(mlir::TypeRange({IndexType, FloatType}), &Context); + +auto UnknownLoc = mlir::UnknownLoc::get(&Context); +auto FileLineColLoc = mlir::FileLineColLoc::get("file", 7, 8, &Context); +auto OpaqueLoc = mlir::OpaqueLoc::get(9, &Context); +auto NameLoc = mlir::NameLoc::get(Identifier, &Context); +auto CallSiteLoc = mlir::CallSiteLoc::get(FileLineColLoc, OpaqueLoc); +auto FusedLoc = mlir::FusedLoc::get({FileLineColLoc, NameLoc}, &Context); + +mlir::Attribute UnitAttr = mlir::UnitAttr::get(&Context); +mlir::Attribute FloatAttr = mlir::FloatAttr::get(FloatType, 1.0); +mlir::Attribute IntegerAttr = mlir::IntegerAttr::get(IntegerType, 10); +mlir::Attribute TypeAttr = mlir::TypeAttr::get(IndexType); +mlir::Attribute ArrayAttr = mlir::ArrayAttr::get({UnitAttr}, &Context); +mlir::Attribute StringAttr = mlir::StringAttr::get("foo", &Context); +mlir::Attribute ElementsAttr = mlir::DenseElementsAttr::get( + VectorType.cast(), llvm::ArrayRef{2.0f, 3.0f}); + +int main() { return 0; } diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb new file mode 100644 index 0000000000000..2a82412401231 --- /dev/null +++ b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb @@ -0,0 +1,112 @@ +# RUN: gdb -q -batch -n -iex 'source %mlir_src_root/utils/gdb-scripts/prettyprinters.py' -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-mlir-support | FileCheck %s +# REQUIRES: debug-info +# REQUIRES: mlir + +break main +run + +# CHECK: "foo" +p Identifier + +# CHECK: "FooOp" +p OperationName + +# CHECK: 0x8 +# CHECK: TrailingOpResult +p Value + +# CHECK: impl = 0x0 +p Type + +# CHECK: cast +p IndexType + +# CHECK: cast +# CHECK: width = 3 +# CHECK: Unsigned +p IntegerType + +# CHECK: cast +p FloatType + +# CHECK: cast +# CHECK: shapeSize = 2 +# CHECK: shapeElements[0] = 4 +# CHECK: shapeElements[1] = 5 +p MemRefType + +# CHECK: cast +# CHECK: memorySpace = 6 +p UnrankedMemRefType + +# CHECK: cast +# CHECK: shapeSize = 2 +# CHECK: shapeElements[0] = 1 +# CHECK: shapeElements[1] = 2 +p VectorType + +# CHECK: cast +# CHECK: numElements = 2 +# CHECK: elements[0] +# CHECK: mlir::IndexType +# CHECK: elements[1] +# CHECK: mlir::Float32Type +p TupleType + +# CHECK: cast +p UnknownLoc + +# CHECK: cast +# CHECK: filename = "file" +# CHECK: line = 7 +# CHECK: column = 8 +p FileLineColLoc + +# CHECK: cast +# CHECK: underlyingLocation = 9 +p OpaqueLoc + +# CHECK: cast +# CHECK: name = "foo" +# CHECK: mlir::UnknownLoc +p NameLoc + +# CHECK: cast +# CHECK: callee +# CHECK: mlir::FileLineColLoc +# CHECK: caller +# CHECK: mlir::OpaqueLoc +p CallSiteLoc + +# CHECK: cast +# CHECK: numLocs = 2 +# CHECK: locs[0] +# CHECK: mlir::FileLineColLoc +# CHECK: locs[1] +# CHECK: mlir::NameLoc +p FusedLoc + +# CHECK: cast +p UnitAttr + +# CHECK: cast +p FloatAttr + +# CHECK: cast +p IntegerAttr + +# CHECK: cast +# CHECK: mlir::IndexType +p TypeAttr + +# CHECK: cast +# CHECK: llvm::ArrayRef of length 1 +# CHECK: mlir::UnitAttr +p ArrayAttr + +# CHECK: cast +# CHECK: value = "foo" +p StringAttr + +# CHECK: cast +p ElementsAttr diff --git a/mlir/utils/gdb-scripts/prettyprinters.py b/mlir/utils/gdb-scripts/prettyprinters.py new file mode 100644 index 0000000000000..39246bf6446e2 --- /dev/null +++ b/mlir/utils/gdb-scripts/prettyprinters.py @@ -0,0 +1,235 @@ +"""GDB pretty printers for MLIR types.""" + +import gdb.printing + + +class IdentifierPrinter: + """Prints an mlir::Identifier instance.""" + + def __init__(self, val): + self.entry = val['entry'] + + def to_string(self): + ptr = (self.entry + 1).cast(gdb.lookup_type('char').pointer()) + return ptr.string(length=self.entry['keyLength']) + + def display_hint(self): + return 'string' + + +class StoragePrinter: + """Prints bases of a struct and its fields.""" + + def __init__(self, val): + self.val = val + + def children(self): + for field in self.val.type.fields(): + if field.is_base_class: + yield ('<%s>' % field.name, self.val.cast(field.type)) + else: + yield (field.name, self.val[field.name]) + + +class TupleTypeStoragePrinter(StoragePrinter): + + def children(self): + for child in StoragePrinter.children(self): + yield child + pointer_type = gdb.lookup_type('mlir::Type').pointer() + elements = (self.val.address + 1).cast(pointer_type) + for i in range(self.val['numElements']): + yield 'elements[%u]' % i, elements[i] + + +class RankedTypeStoragePrinter(StoragePrinter): + + def children(self): + for child in StoragePrinter.children(self): + yield child + for i in range(self.val['shapeSize']): + yield 'shapeElements[%u]' % i, self.val['shapeElements'][i] + + +class MemRefTypeStoragePrinter(RankedTypeStoragePrinter): + + def children(self): + for child in RankedTypeStoragePrinter.children(self): + yield child + for i in range(self.val['numAffineMaps']): + yield 'affineMapsList[%u]' % i, self.val['affineMapsList'][i] + + +class FusedLocationStoragePrinter(StoragePrinter): + + def children(self): + for child in StoragePrinter.children(self): + yield child + pointer_type = gdb.lookup_type('mlir::Location').pointer() + elements = (self.val.address + 1).cast(pointer_type) + for i in range(self.val['numLocs']): + yield 'locs[%u]' % i, elements[i] + + +class StorageUserBasePrinter: + """Printer for an mlir::detail::StorageUserBase instance.""" + + def __init__(self, val): + self.val = val + + def children(self): + storage_type = self.val.type.template_argument(2) + yield 'impl', self.val['impl'].dereference().cast(storage_type) + + +class StorageTypeMap: + """Maps a TypeID to the corresponding type derived from StorageUserBase. + + Types need to be registered by name before the first lookup. + """ + + def __init__(self): + self.map = None + self.type_names = [] + + def register_type(self, type_name): + assert not self.map, 'register_type called after __getitem__' + self.type_names += [type_name] + + def _init_map(self): + """Lazy initialization of self.map.""" + if self.map: + return + self.map = {} + for type_name in self.type_names: + concrete_type = gdb.lookup_type(type_name) + storage = gdb.parse_and_eval( + "&'mlir::TypeID::get<%s>()::instance'" % type_name) + if concrete_type and storage: + self.map[int(storage)] = concrete_type + + def __getitem__(self, type_id): + self._init_map() + return self.map.get(int(type_id['storage'])) + + +storage_type_map = StorageTypeMap() + + +def get_type_id_printer(val): + """Returns a printer of the name of a mlir::TypeID.""" + + class StringPrinter: + + def __init__(self, string): + self.string = string + + def to_string(self): + return self.string + + concrete_type = storage_type_map[val] + if not concrete_type: + return None + return StringPrinter('"%s"' % concrete_type.name) + + +def get_attr_or_type_printer(val, get_type_id): + """Returns a printer for mlir::Attribute or mlir::Type.""" + + class UpcastPrinter: + + def __init__(self, val, type): + self.val = val.cast(type) + + def children(self): + yield 'cast<%s>' % self.val.type.name, self.val + + if not val['impl']: + return None + type_id = get_type_id(val['impl'].dereference()) + concrete_type = storage_type_map[type_id] + if not concrete_type: + return None + return UpcastPrinter(val, concrete_type) + + +pp = gdb.printing.RegexpCollectionPrettyPrinter('MLIRSupport') + +pp.add_printer('mlir::Identifier', '^mlir::Identifier$', IdentifierPrinter) + +# Printers for types deriving from AttributeStorage or TypeStorage. +pp.add_printer('mlir::detail::FusedLocationStorage', + '^mlir::detail::FusedLocationStorage', + FusedLocationStoragePrinter) +pp.add_printer('mlir::detail::VectorTypeStorage', + '^mlir::detail::VectorTypeStorage', RankedTypeStoragePrinter) +pp.add_printer('mlir::detail::RankedTensorTypeStorage', + '^mlir::detail::RankedTensorTypeStorage', + RankedTypeStoragePrinter) +pp.add_printer('mlir::detail::MemRefTypeStorage', + '^mlir::detail::MemRefTypeStorage$', MemRefTypeStoragePrinter) +pp.add_printer('mlir::detail::TupleTypeStorage', + '^mlir::detail::TupleTypeStorage$', TupleTypeStoragePrinter) + +# Printers for Attribute::AttrBase or Type::TypeBase typedefs. +pp.add_printer('mlir::detail::StorageUserBase', + '^mlir::detail::StorageUserBase<.*>$', StorageUserBasePrinter) + +# Printers of types deriving from Attribute::AttrBase or Type::TypeBase. +for name in [ + # mlir/IR/Attributes.h + 'ArrayAttr', + 'DictionaryAttr', + 'FloatAttr', + 'IntegerAttr', + 'IntegerSetAttr', + 'OpaqueAttr', + 'StringAttr', + 'SymbolRefAttr', + 'TypeAttr', + 'UnitAttr', + 'DenseStringElementsAttr', + 'DenseIntOrFPElementsAttr', + 'OpaqueElementsAttr', + 'SparseElementsAttr', + # mlir/IR/StandardTypes.h + 'ComplexType', + 'IndexType', + 'IntegerType', + 'Float16Type', + 'Float32Type', + 'Float64Type', + 'NoneType', + 'VectorType', + 'RankedTensorType', + 'UnrankedTensorType', + 'MemRefType', + 'UnrankedMemRefType', + 'TupleType', + # mlir/IR/Location.h + 'CallSiteLoc', + 'FileLineColLoc', + 'FusedLoc', + 'NameLoc', + 'OpaqueLoc', + 'UnknownLoc' +]: + storage_type_map.register_type('mlir::%s' % name) # Register for upcasting. + +pp.add_printer('mlir::TypeID', '^mlir::TypeID$', get_type_id_printer) + + +def add_attr_or_type_printers(name): + """Adds printers for mlir::Attribute or mlir::Type and their Storage type.""" + get_type_id = lambda val: val['abstract%s' % name]['typeID'] + pp.add_printer('mlir::%s' % name, '^mlir::%s$' % name, + lambda val: get_attr_or_type_printer(val, get_type_id)) + pp.add_printer('mlir::%sStorage' % name, '^mlir::%sStorage$' % name, + lambda val: get_type_id_printer(get_type_id(val))) + + +# Upcasting printers of mlir::Attribute and mlir::Type. +for name in ['Attribute', 'Type']: + add_attr_or_type_printers(name) + +gdb.printing.register_pretty_printer(gdb.current_objfile(), pp) From dd14e5825209386129770296f9bc3a14ab0b4592 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Wed, 30 Sep 2020 12:58:24 -0700 Subject: [PATCH 201/544] [mlir][vector] First step of vector distribution transformation This is the first of several steps to support distributing large vectors. This adds instructions extract_map and insert_map that allow us to do incremental lowering. Right now the transformation only apply to simple pointwise operation with a vector size matching the multiplicity of the IDs used to distribute the vector. This can be used to distribute large vectors to loops or SPMD. Differential Revision: https://reviews.llvm.org/D88341 --- mlir/include/mlir/Dialect/Vector/VectorOps.td | 105 ++++++++++++++++++ .../mlir/Dialect/Vector/VectorTransforms.h | 41 +++++++ mlir/lib/Dialect/Vector/VectorOps.cpp | 47 ++++++++ mlir/lib/Dialect/Vector/VectorTransforms.cpp | 34 ++++++ mlir/test/Dialect/Vector/invalid.mlir | 28 +++++ mlir/test/Dialect/Vector/ops.mlir | 11 ++ .../Dialect/Vector/vector-distribution.mlir | 13 +++ .../lib/Transforms/TestVectorTransforms.cpp | 26 +++++ 8 files changed, 305 insertions(+) create mode 100644 mlir/test/Dialect/Vector/vector-distribution.mlir diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index f74c8687bf531..42e947071403f 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -454,6 +454,71 @@ def Vector_ExtractSlicesOp : }]; } +def Vector_ExtractMapOp : + Vector_Op<"extract_map", [NoSideEffect]>, + Arguments<(ins AnyVector:$vector, Index:$id, I64Attr:$multiplicity)>, + Results<(outs AnyVector)> { + let summary = "vector extract map operation"; + let description = [{ + Takes an 1-D vector and extract a sub-part of the vector starting at id with + a size of `vector size / multiplicity`. This maps a given multiplicity of + the vector to a Value such as a loop induction variable or an SPMD id. + + Similarly to vector.tuple_get, this operation is used for progressive + lowering and should be folded away before converting to LLVM. + + + For instance, the following code: + ```mlir + %a = vector.transfer_read %A[%c0]: memref<32xf32>, vector<32xf32> + %b = vector.transfer_read %B[%c0]: memref<32xf32>, vector<32xf32> + %c = addf %a, %b: vector<32xf32> + vector.transfer_write %c, %C[%c0]: memref<32xf32>, vector<32xf32> + ``` + can be rewritten to: + ```mlir + %a = vector.transfer_read %A[%c0]: memref<32xf32>, vector<32xf32> + %b = vector.transfer_read %B[%c0]: memref<32xf32>, vector<32xf32> + %ea = vector.extract_map %a[%id : 32] : vector<32xf32> to vector<1xf32> + %eb = vector.extract_map %b[%id : 32] : vector<32xf32> to vector<1xf32> + %ec = addf %ea, %eb : vector<1xf32> + %c = vector.insert_map %ec, %id, 32 : vector<1xf32> to vector<32xf32> + vector.transfer_write %c, %C[%c0]: memref<32xf32>, vector<32xf32> + ``` + + Where %id can be an induction variable or an SPMD id going from 0 to 31. + + And then be rewritten to: + ```mlir + %a = vector.transfer_read %A[%id]: memref<32xf32>, vector<1xf32> + %b = vector.transfer_read %B[%id]: memref<32xf32>, vector<1xf32> + %c = addf %a, %b: vector<1xf32> + vector.transfer_write %c, %C[%id]: memref<32xf32>, vector<1xf32> + ``` + + Example: + + ```mlir + %ev = vector.extract_map %v[%id:32] : vector<32xf32> to vector<1xf32> + ``` + }]; + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, " # + "Value vector, Value id, int64_t multiplicity">]; + let extraClassDeclaration = [{ + VectorType getSourceVectorType() { + return vector().getType().cast(); + } + VectorType getResultType() { + return getResult().getType().cast(); + } + }]; + let assemblyFormat = [{ + $vector `[` $id `:` $multiplicity `]` attr-dict `:` type($vector) `to` + type(results) + }]; +} + def Vector_FMAOp : Op]>, @@ -626,6 +691,46 @@ def Vector_InsertSlicesOp : }]; } +def Vector_InsertMapOp : + Vector_Op<"insert_map", [NoSideEffect]>, + Arguments<(ins AnyVector:$vector, Index:$id, I64Attr:$multiplicity)>, + Results<(outs AnyVector)> { + let summary = "vector insert map operation"; + let description = [{ + insert an 1-D vector and within a larger vector starting at id. The new + vector created will have a size of `vector size * multiplicity`. This + represents how a sub-part of the vector is written for a given Value such as + a loop induction variable or an SPMD id. + + Similarly to vector.tuple_get, this operation is used for progressive + lowering and should be folded away before converting to LLVM. + + This operations is meant to be used in combination with vector.extract_map. + See example in extract.map description. + + Example: + + ```mlir + %v = vector.insert_map %ev, %id, 32 : vector<1xf32> to vector<32xf32> + ``` + }]; + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, " # + "Value vector, Value id, int64_t multiplicity">]; + let extraClassDeclaration = [{ + VectorType getSourceVectorType() { + return vector().getType().cast(); + } + VectorType getResultType() { + return getResult().getType().cast(); + } + }]; + let assemblyFormat = [{ + $vector `,` $id `,` $multiplicity attr-dict `:` type($vector) `to` + type(results) + }]; +} + def Vector_InsertStridedSliceOp : Vector_Op<"insert_strided_slice", [NoSideEffect, PredOpTrait<"operand #0 and result have same element type", diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index 9587c56c0255c..da9650c67efb8 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -172,6 +172,47 @@ struct VectorTransferFullPartialRewriter : public RewritePattern { FilterConstraintType filter; }; +struct DistributeOps { + ExtractMapOp extract; + InsertMapOp insert; +}; + +/// Distribute a 1D vector pointwise operation over a range of given IDs taking +/// *all* values in [0 .. multiplicity - 1] (e.g. loop induction variable or +/// SPMD id). This transformation only inserts +/// vector.extract_map/vector.insert_map. It is meant to be used with +/// canonicalizations pattern to propagate and fold the vector +/// insert_map/extract_map operations. +/// Transforms: +// %v = addf %a, %b : vector<32xf32> +/// to: +/// %v = addf %a, %b : vector<32xf32> %ev = +/// vector.extract_map %v, %id, 32 : vector<32xf32> into vector<1xf32> %nv = +/// vector.insert_map %ev, %id, 32 : vector<1xf32> into vector<32xf32> +Optional distributPointwiseVectorOp(OpBuilder &builder, + Operation *op, Value id, + int64_t multiplicity); +/// Canonicalize an extra element using the result of a pointwise operation. +/// Transforms: +/// %v = addf %a, %b : vector32xf32> +/// %dv = vector.extract_map %v, %id, 32 : vector<32xf32> into vector<1xf32> +/// to: +/// %da = vector.extract_map %a, %id, 32 : vector<32xf32> into vector<1xf32> +/// %db = vector.extract_map %a, %id, 32 : vector<32xf32> into vector<1xf32> +/// %dv = addf %da, %db : vector<1xf32> +struct PointwiseExtractPattern : public OpRewritePattern { + using FilterConstraintType = std::function; + PointwiseExtractPattern( + MLIRContext *context, FilterConstraintType constraint = + [](ExtractMapOp op) { return success(); }) + : OpRewritePattern(context), filter(constraint) {} + LogicalResult matchAndRewrite(ExtractMapOp extract, + PatternRewriter &rewriter) const override; + +private: + FilterConstraintType filter; +}; + } // namespace vector //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 348ccf8413086..1a83c556d47bb 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -900,6 +900,29 @@ void ExtractSlicesOp::getStrides(SmallVectorImpl &results) { populateFromInt64AttrArray(strides(), results); } +//===----------------------------------------------------------------------===// +// ExtractMapOp +//===----------------------------------------------------------------------===// + +void ExtractMapOp::build(OpBuilder &builder, OperationState &result, + Value vector, Value id, int64_t multiplicity) { + VectorType type = vector.getType().cast(); + VectorType resultType = VectorType::get(type.getNumElements() / multiplicity, + type.getElementType()); + ExtractMapOp::build(builder, result, resultType, vector, id, multiplicity); +} + +static LogicalResult verify(ExtractMapOp op) { + if (op.getSourceVectorType().getShape().size() != 1 || + op.getResultType().getShape().size() != 1) + return op.emitOpError("expects source and destination vectors of rank 1"); + if (op.getResultType().getNumElements() * (int64_t)op.multiplicity() != + op.getSourceVectorType().getNumElements()) + return op.emitOpError("vector sizes mismatch. Source size must be equal " + "to destination size * multiplicity"); + return success(); +} + //===----------------------------------------------------------------------===// // BroadcastOp //===----------------------------------------------------------------------===// @@ -1122,6 +1145,30 @@ void InsertSlicesOp::getStrides(SmallVectorImpl &results) { populateFromInt64AttrArray(strides(), results); } +//===----------------------------------------------------------------------===// +// InsertMapOp +//===----------------------------------------------------------------------===// + +void InsertMapOp::build(OpBuilder &builder, OperationState &result, + Value vector, Value id, int64_t multiplicity) { + VectorType type = vector.getType().cast(); + VectorType resultType = VectorType::get(type.getNumElements() * multiplicity, + type.getElementType()); + InsertMapOp::build(builder, result, resultType, vector, id, multiplicity); +} + +static LogicalResult verify(InsertMapOp op) { + if (op.getSourceVectorType().getShape().size() != 1 || + op.getResultType().getShape().size() != 1) + return op.emitOpError("expected source and destination vectors of rank 1"); + if ((int64_t)op.multiplicity() * op.getSourceVectorType().getNumElements() != + op.getResultType().getNumElements()) + return op.emitOpError( + "vector sizes mismatch. Destination size must be equal " + "to source size * multiplicity"); + return success(); +} + //===----------------------------------------------------------------------===// // InsertStridedSliceOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 5bf7857a66e8f..6a244a454e06d 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2418,6 +2418,40 @@ LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( return failure(); } +LogicalResult mlir::vector::PointwiseExtractPattern::matchAndRewrite( + ExtractMapOp extract, PatternRewriter &rewriter) const { + Operation *definedOp = extract.vector().getDefiningOp(); + if (!definedOp || definedOp->getNumResults() != 1) + return failure(); + // TODO: Create an interfaceOp for elementwise operations. + if (!isa(definedOp)) + return failure(); + Location loc = extract.getLoc(); + SmallVector extractOperands; + for (OpOperand &operand : definedOp->getOpOperands()) + extractOperands.push_back(rewriter.create( + loc, operand.get(), extract.id(), extract.multiplicity())); + Operation *newOp = cloneOpWithOperandsAndTypes( + rewriter, loc, definedOp, extractOperands, extract.getResult().getType()); + rewriter.replaceOp(extract, newOp->getResult(0)); + return success(); +} + +Optional +mlir::vector::distributPointwiseVectorOp(OpBuilder &builder, Operation *op, + Value id, int64_t multiplicity) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfter(op); + Location loc = op->getLoc(); + Value result = op->getResult(0); + DistributeOps ops; + ops.extract = + builder.create(loc, result, id, multiplicity); + ops.insert = + builder.create(loc, ops.extract, id, multiplicity); + return ops; +} + // TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp). // TODO: Add this as DRR pattern. void mlir::vector::populateVectorToVectorTransformationPatterns( diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 3a081231fc7d1..25e002fed35a0 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1328,3 +1328,31 @@ func @compress_dim_mask_mismatch(%base: memref, %mask: vector<17xi1>, %va // expected-error@+1 {{'vector.compressstore' op expected value dim to match mask dim}} vector.compressstore %base, %mask, %value : memref, vector<17xi1>, vector<16xf32> } + +// ----- + +func @extract_map_rank(%v: vector<2x32xf32>, %id : index) { + // expected-error@+1 {{'vector.extract_map' op expects source and destination vectors of rank 1}} + %0 = vector.extract_map %v[%id : 32] : vector<2x32xf32> to vector<2x1xf32> +} + +// ----- + +func @extract_map_size(%v: vector<63xf32>, %id : index) { + // expected-error@+1 {{'vector.extract_map' op vector sizes mismatch. Source size must be equal to destination size * multiplicity}} + %0 = vector.extract_map %v[%id : 32] : vector<63xf32> to vector<2xf32> +} + +// ----- + +func @insert_map_rank(%v: vector<2x1xf32>, %id : index) { + // expected-error@+1 {{'vector.insert_map' op expected source and destination vectors of rank 1}} + %0 = vector.insert_map %v, %id, 32 : vector<2x1xf32> to vector<2x32xf32> +} + +// ----- + +func @insert_map_size(%v: vector<1xf32>, %id : index) { + // expected-error@+1 {{'vector.insert_map' op vector sizes mismatch. Destination size must be equal to source size * multiplicity}} + %0 = vector.insert_map %v, %id, 32 : vector<1xf32> to vector<64xf32> +} diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 2a62be94e01bd..7315d2189c673 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -432,3 +432,14 @@ func @expand_and_compress(%base: memref, %mask: vector<16xi1>, %passthru: vector.compressstore %base, %mask, %0 : memref, vector<16xi1>, vector<16xf32> return } + +// CHECK-LABEL: @extract_insert_map +func @extract_insert_map(%v: vector<32xf32>, %id : index) -> vector<32xf32> { + // CHECK: %[[V:.*]] = vector.extract_map %{{.*}}[%{{.*}} : 16] : vector<32xf32> to vector<2xf32> + %vd = vector.extract_map %v[%id : 16] : vector<32xf32> to vector<2xf32> + // CHECK: %[[R:.*]] = vector.insert_map %[[V]], %{{.*}}, 16 : vector<2xf32> to vector<32xf32> + %r = vector.insert_map %vd, %id, 16 : vector<2xf32> to vector<32xf32> + // CHECK: return %[[R]] : vector<32xf32> + return %r : vector<32xf32> +} + diff --git a/mlir/test/Dialect/Vector/vector-distribution.mlir b/mlir/test/Dialect/Vector/vector-distribution.mlir new file mode 100644 index 0000000000000..0216a017d7af0 --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-distribution.mlir @@ -0,0 +1,13 @@ +// RUN: mlir-opt %s -test-vector-distribute-patterns | FileCheck %s + +// CHECK-LABEL: func @distribute_vector_add +// CHECK-SAME: (%[[ID:.*]]: index +// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]] : 32] : vector<32xf32> to vector<1xf32> +// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]] : 32] : vector<32xf32> to vector<1xf32> +// CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<1xf32> +// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ID]], 32 : vector<1xf32> to vector<32xf32> +// CHECK-NEXT: return %[[INS]] : vector<32xf32> +func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> { + %0 = addf %A, %B : vector<32xf32> + return %0: vector<32xf32> +} diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index ab8460318b49f..2ffe10bc16824 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -125,6 +125,28 @@ struct TestVectorUnrollingPatterns } }; +struct TestVectorDistributePatterns + : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnFunction() override { + MLIRContext *ctx = &getContext(); + OwningRewritePatternList patterns; + FuncOp func = getFunction(); + func.walk([&](AddFOp op) { + OpBuilder builder(op); + Optional ops = distributPointwiseVectorOp( + builder, op.getOperation(), func.getArgument(0), 32); + assert(ops.hasValue()); + SmallPtrSet extractOp({ops->extract}); + op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp); + }); + patterns.insert(ctx); + applyPatternsAndFoldGreedily(getFunction(), patterns); + } +}; + struct TestVectorTransferFullPartialSplitPatterns : public PassWrapper { @@ -178,5 +200,9 @@ void registerTestVectorConversions() { vectorTransformFullPartialPass("test-vector-transfer-full-partial-split", "Test conversion patterns to split " "transfer ops via scf.if + linalg ops"); + PassRegistration distributePass( + "test-vector-distribute-patterns", + "Test conversion patterns to distribute vector ops in the vector " + "dialect"); } } // namespace mlir From ce5379f0f0675592fd10a522009fd5b1561ca72b Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 30 Sep 2020 13:23:21 -0700 Subject: [PATCH 202/544] [NPM] Add target specific hook to add passes for New Pass Manager The patch adds a new TargetMachine member "registerPassBuilderCallbacks" for targets to add passes to the pass pipeline using the New Pass Manager (similar to adjustPassManager for the Legacy Pass Manager). Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D88138 --- clang/lib/CodeGen/BackendUtil.cpp | 3 +++ llvm/include/llvm/Target/TargetMachine.h | 6 +++++ .../Target/Hexagon/HexagonTargetMachine.cpp | 13 +++++++++ .../lib/Target/Hexagon/HexagonTargetMachine.h | 2 ++ .../Hexagon/registerpassbuildercallbacks.ll | 27 +++++++++++++++++++ llvm/tools/opt/NewPMDriver.cpp | 3 +++ 6 files changed, 54 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/registerpassbuildercallbacks.ll diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d77590cc2adf3..dbd67a6ebe9b5 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1214,6 +1214,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PB.registerLoopAnalyses(LAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + if (TM) + TM->registerPassBuilderCallbacks(PB, CodeGenOpts.DebugPassManager); + ModulePassManager MPM(CodeGenOpts.DebugPassManager); if (!CodeGenOpts.DisableLLVMPasses) { diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index 60d4fb579bb93..4a91528321076 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -34,6 +34,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class MCSymbol; class raw_pwrite_stream; +class PassBuilder; class PassManagerBuilder; struct PerFunctionMIParsingState; class SMDiagnostic; @@ -294,6 +295,11 @@ class TargetMachine { /// PassManagerBuilder::addExtension. virtual void adjustPassManager(PassManagerBuilder &) {} + /// Allow the target to modify the pass pipeline with New Pass Manager + /// (similar to adjustPassManager for Legacy Pass manager). + virtual void registerPassBuilderCallbacks(PassBuilder &, + bool DebugPassManager) {} + /// Add passes to the specified pass manager to get the specified file /// emitted. Typically this will involve several steps of code generation. /// This method should return true if emission of this file type is not diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index cb3b6fbdd69e5..0f15c46bc8bb6 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -273,6 +274,18 @@ void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) { }); } +void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) { + PB.registerOptimizerLastEPCallback( + [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + LoopPassManager LPM(DebugPassManager); + FunctionPassManager FPM(DebugPassManager); + LPM.addPass(HexagonVectorLoopCarriedReusePass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); +} + TargetTransformInfo HexagonTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(HexagonTTIImpl(this, F)); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index 7ee4474e90e3f..fa174128f708f 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -37,6 +37,8 @@ class HexagonTargetMachine : public LLVMTargetMachine { static unsigned getModuleMatchQuality(const Module &M); void adjustPassManager(PassManagerBuilder &PMB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) override; TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetTransformInfo getTargetTransformInfo(const Function &F) override; diff --git a/llvm/test/CodeGen/Hexagon/registerpassbuildercallbacks.ll b/llvm/test/CodeGen/Hexagon/registerpassbuildercallbacks.ll new file mode 100644 index 0000000000000..18bca19ac245b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/registerpassbuildercallbacks.ll @@ -0,0 +1,27 @@ +; RUN: opt -mtriple=hexagon -disable-verify -debug-pass-manager \ +; RUN: -disable-output -passes='default' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=NPM +; RUN: opt -mtriple=hexagon -disable-verify -debug-pass-manager \ +; RUN: -disable-output -passes='default' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=NPM +; RUN: opt -mtriple=hexagon -disable-verify -debug-pass-manager \ +; RUN: -disable-output -passes='default' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=NPM + +; Test TargetMachine::registerPassBuilderCallbacks +; NPM: Running pass: HexagonVectorLoopCarriedReusePass + +declare void @bar() local_unnamed_addr + +define void @foo(i32 %n) local_unnamed_addr { +entry: + br label %loop +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + tail call void @bar() + %cmp = icmp eq i32 %iv, %n + br i1 %cmp, label %exit, label %loop +exit: + ret void +} diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index c6c4191c14592..f01d33efe45ad 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -375,6 +375,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, PB.registerLoopAnalyses(LAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + if (TM) + TM->registerPassBuilderCallbacks(PB, DebugPM); + ModulePassManager MPM(DebugPM); if (VK > VK_NoVerifier) MPM.addPass(VerifierPass()); From d1d7fc98325d948bede85e6304c5ca93f79e050e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Sep 2020 12:36:28 -0700 Subject: [PATCH 203/544] [X86] Canonicalize (x > 1) ? x : 1 -> (x >= 1) ? x : 1 for sign and unsigned to enable the use of test instructions for the compare. This will be further canonicalized to a compare involving 0 which will enable the use of test instructions. Either using cmovg for signed for cmovne for unsigned. Fixes more case for PR47049 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +++++++++++++--- llvm/test/CodeGen/X86/cmov.ll | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d0fd1046fdeb7..8306e3a23f479 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40939,8 +40939,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // (x > 0) ? x : 0 -> (x >= 0) ? x : 0 // (x < -1) ? x : -1 -> (x <= -1) ? x : -1 // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates - // the need for an extra compare - // against zero. e.g. + // the need for an extra compare against zero. e.g. // (a - b) > 0 : (a - b) ? 0 -> (a - b) >= 0 : (a - b) ? 0 // subl %esi, %edi // testl %edi, %edi @@ -40950,17 +40949,28 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // xorl %eax, %eax // subl %esi, $edi // cmovsl %eax, %edi + // + // We can also canonicalize + // (x s> 1) ? x : 1 -> (x s>= 1) ? x : 1 -> (x s> 0) ? x : 1 + // (x u> 1) ? x : 1 -> (x u>= 1) ? x : 1 -> (x != 0) ? x : 1 + // This allows the use of a test instruction for the compare. if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse() && LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); - if ((CC == ISD::SETGT && isNullConstant(RHS)) || + if ((CC == ISD::SETGT && (isNullConstant(RHS) || isOneConstant(RHS))) || (CC == ISD::SETLT && isAllOnesConstant(RHS))) { ISD::CondCode NewCC = CC == ISD::SETGT ? ISD::SETGE : ISD::SETLE; Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), Cond.getOperand(1), NewCC); return DAG.getSelect(DL, VT, Cond, LHS, RHS); } + if (CC == ISD::SETUGT && isOneConstant(RHS)) { + ISD::CondCode NewCC = ISD::SETUGE; + Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), + Cond.getOperand(0), Cond.getOperand(1), NewCC); + return DAG.getSelect(DL, VT, Cond, LHS, RHS); + } } // Match VSELECTs into subs with unsigned saturation. diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 41ebc322b430a..b77d3c8e10e05 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -235,3 +235,27 @@ define i32 @pr47049_2(i32 %0) { %3 = select i1 %2, i32 %0, i32 -1 ret i32 %3 } + +define i32 @pr47049_3(i32 %0) { +; CHECK-LABEL: pr47049_3: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovgl %edi, %eax +; CHECK-NEXT: retq + %2 = icmp sgt i32 %0, 1 + %3 = select i1 %2, i32 %0, i32 1 + ret i32 %3 +} + +define i32 @pr47049_4(i32 %0) { +; CHECK-LABEL: pr47049_4: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovnel %edi, %eax +; CHECK-NEXT: retq + %2 = icmp ugt i32 %0, 1 + %3 = select i1 %2, i32 %0, i32 1 + ret i32 %3 +} From 8a1084a9486313e9f46e61ab69f80309c7050e1f Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 30 Sep 2020 22:58:07 +0200 Subject: [PATCH 204/544] [asan][test] XFAIL Posix/no_asan_gen_globals.c on Solaris `Posix/no_asan_gen_globals.c` currently `FAIL`s on Solaris: $ nm no_asan_gen_globals.c.tmp.exe | grep ___asan_gen_ 0809696a r .L___asan_gen_.1 0809a4cd r .L___asan_gen_.2 080908e2 r .L___asan_gen_.4 0809a4cd r .L___asan_gen_.5 0809a529 r .L___asan_gen_.7 0809a4cd r .L___asan_gen_.8 As detailed in Bug 47607, there are two factors here: - `clang` plays games by emitting some local labels into the symbol table. When instead one uses `-fno-integrated-as` to have `gas` create the object files, they don't land in the objects in the first place. - Unlike GNU `ld`, the Solaris `ld` doesn't support support `-X`/`--discard-locals` but instead relies on the assembler to follow its specification and not emit local labels. Therefore this patch `XFAIL`s the test on Solaris. Tested on `amd64-pc-solaris2.11` and `x86_64-pc-linux-gnu`. Differential Revision: https://reviews.llvm.org/D88218 --- compiler-rt/test/asan/TestCases/Posix/no_asan_gen_globals.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/test/asan/TestCases/Posix/no_asan_gen_globals.c b/compiler-rt/test/asan/TestCases/Posix/no_asan_gen_globals.c index 994f827974be9..c9a6d99aca23d 100644 --- a/compiler-rt/test/asan/TestCases/Posix/no_asan_gen_globals.c +++ b/compiler-rt/test/asan/TestCases/Posix/no_asan_gen_globals.c @@ -1,5 +1,7 @@ // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316 // XFAIL: android +// Bug 47607 +// XFAIL: solaris // Make sure ___asan_gen_* strings do not end up in the symbol table. // RUN: %clang_asan %s -o %t.exe From ae4c400e02fc3f7cff11cc332e6b107353b3e6a2 Mon Sep 17 00:00:00 2001 From: Hubert Tong Date: Wed, 30 Sep 2020 16:58:48 -0400 Subject: [PATCH 205/544] [NFC] Fix spacing in clang/test/Driver/aix-ld.c Fix one line with mismatch in indentation after afc277b0ed0d. --- clang/test/Driver/aix-ld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/aix-ld.c b/clang/test/Driver/aix-ld.c index 7ccbeff3b8b64..89959d851b935 100644 --- a/clang/test/Driver/aix-ld.c +++ b/clang/test/Driver/aix-ld.c @@ -403,7 +403,7 @@ // CHECK-LD32-NOSTDLIBXX-LCXX: "-L[[SYSROOT]]/usr/lib" // CHECK-LD32-NOSTDLIBXX-LCXX-NOT: "-lc++" // CHECK-LD32-NOSTDLIBXX-LCXX: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aix{{/|\\\\}}libclang_rt.builtins-powerpc.a" -// CHECK-LD32-NOSTDLIBXX-LCXX: "-lm" +// CHECK-LD32-NOSTDLIBXX-LCXX: "-lm" // CHECK-LD32-NOSTDLIBXX-LCXX: "-lc" // Check powerpc64-ibm-aix7.1.0.0, 64-bit. -nostdlib++. From 0c3c8f4ae69a619efd8dc088e2572db172d40547 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 12:43:21 -0700 Subject: [PATCH 206/544] [flang] Fix descriptor-based array data item I/O for list-directed CHARACTER & LOGICAL These types have to distinguish list-directed I/O from formatted I/O, and the subscript incrementation call was in the formatted branch of the if() rather than after the if(). Differential revision: https://reviews.llvm.org/D88606 --- flang/runtime/descriptor-io.h | 16 +++++++------- flang/unittests/Runtime/hello.cpp | 36 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index ce0f39740c5fe..22552f27c1699 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -159,13 +159,13 @@ inline bool FormattedCharacterIO( } } } - if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) { - io.GetIoErrorHandler().Crash( - "FormattedCharacterIO: subscripts out of bounds"); - } } else { return false; } + if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) { + io.GetIoErrorHandler().Crash( + "FormattedCharacterIO: subscripts out of bounds"); + } } return true; } @@ -198,13 +198,13 @@ inline bool FormattedLogicalIO( } } } - if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) { - io.GetIoErrorHandler().Crash( - "FormattedLogicalIO: subscripts out of bounds"); - } } else { return false; } + if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) { + io.GetIoErrorHandler().Crash( + "FormattedLogicalIO: subscripts out of bounds"); + } } return true; } diff --git a/flang/unittests/Runtime/hello.cpp b/flang/unittests/Runtime/hello.cpp index c1daccae383ac..bcd3bb448318e 100644 --- a/flang/unittests/Runtime/hello.cpp +++ b/flang/unittests/Runtime/hello.cpp @@ -118,6 +118,41 @@ static void listInputTest() { } } +static void descrOutputTest() { + char buffer[9]; + // Formatted + const char *format{"(2A4)"}; + auto cookie{IONAME(BeginInternalFormattedOutput)( + buffer, sizeof buffer, format, std::strlen(format))}; + StaticDescriptor<1> staticDescriptor; + Descriptor &desc{staticDescriptor.descriptor()}; + SubscriptValue extent[]{2}; + char data[2][4]; + std::memcpy(data[0], "ABCD", 4); + std::memcpy(data[1], "EFGH", 4); + desc.Establish(TypeCode{CFI_type_char}, sizeof data[0], &data, 1, extent); + desc.Dump(); + desc.Check(); + IONAME(OutputDescriptor)(cookie, desc); + if (auto status{IONAME(EndIoStatement)(cookie)}) { + Fail() << "descrOutputTest: '" << format << "' failed, status " + << static_cast(status) << '\n'; + } else { + test("descrOutputTest(formatted)", "ABCDEFGH ", + std::string{buffer, sizeof buffer}); + } + // List-directed + cookie = IONAME(BeginInternalListOutput)(buffer, sizeof buffer); + IONAME(OutputDescriptor)(cookie, desc); + if (auto status{IONAME(EndIoStatement)(cookie)}) { + Fail() << "descrOutputTest: list-directed failed, status " + << static_cast(status) << '\n'; + } else { + test("descrOutputTest(list)", " ABCDEFGH", + std::string{buffer, sizeof buffer}); + } +} + static void realTest(const char *format, double x, const char *expect) { char buffer[800]; auto cookie{IONAME(BeginInternalFormattedOutput)( @@ -485,6 +520,7 @@ int main() { realInTest("(DC,F18.0)", " 12,5", 0x4029000000000000); listInputTest(); + descrOutputTest(); return EndTests(); } From 85fc5bf341395171e67490061f6fbc76b297b78d Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 30 Sep 2020 23:19:08 +0200 Subject: [PATCH 207/544] [clangd] Remove dead variable. NFC --- clang-tools-extra/clangd/URI.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang-tools-extra/clangd/URI.cpp b/clang-tools-extra/clangd/URI.cpp index f9e8fdc46fa7f..80e3a1017312d 100644 --- a/clang-tools-extra/clangd/URI.cpp +++ b/clang-tools-extra/clangd/URI.cpp @@ -111,7 +111,6 @@ bool shouldEscape(unsigned char C) { /// - Reserved characters always escaped with exceptions like '/'. /// - All other characters are escaped. void percentEncode(llvm::StringRef Content, std::string &Out) { - std::string Result; for (unsigned char C : Content) if (shouldEscape(C)) { Out.push_back('%'); From 49b3459930655d879b2dc190ff8fe11c38a8be5f Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 14 May 2020 14:02:36 -0700 Subject: [PATCH 208/544] [PDB] Merge types in parallel when using ghashing This makes type merging much faster (-24% on chrome.dll) when multiple threads are available, but it slightly increases the time to link (+10%) when /threads:1 is passed. With only one more thread, the new type merging is faster (-11%). The output PDB should be identical to what it was before this change. To give an idea, here is the /time output placed side by side: BEFORE | AFTER Input File Reading: 956 ms | 968 ms Code Layout: 258 ms | 190 ms Commit Output File: 6 ms | 7 ms PDB Emission (Cumulative): 6691 ms | 4253 ms Add Objects: 4341 ms | 2927 ms Type Merging: 2814 ms | 1269 ms -55%! Symbol Merging: 1509 ms | 1645 ms Publics Stream Layout: 111 ms | 112 ms TPI Stream Layout: 764 ms | 26 ms trivial Commit to Disk: 1322 ms | 1036 ms -300ms ----------------------------------------- -------- Total Link Time: 8416 ms 5882 ms -30% overall The main source of the additional overhead in the single-threaded case is the need to iterate all .debug$T sections up front to check which type records should go in the IPI stream. See fillIsItemIndexFromDebugT. With changes to the .debug$H section, we could pre-calculate this info and eliminate the need to do this walk up front. That should restore single-threaded performance back to what it was before this change. This change will cause LLD to be much more parallel than it used to, and for users who do multiple links in parallel, it could regress performance. However, when the user is only doing one link, it's a huge improvement. In the future, we can use NT worker threads to avoid oversaturating the machine with work, but for now, this is such an improvement for the single-link use case that I think we should land this as is. Algorithm ---------- Before this change, we essentially used a DenseMap to check if a type has already been seen, and if it hasn't been seen, insert it now and use the next available type index for it in the destination type stream. DenseMap does not support concurrent insertion, and even if it did, the linker must be deterministic: it cannot produce different PDBs by using different numbers of threads. The output type stream must be in the same order regardless of the order of hash table insertions. In order to create a hash table that supports concurrent insertion, the table cells must be small enough that they can be updated atomically. The algorithm I used for updating the table using linear probing is described in this paper, "Concurrent Hash Tables: Fast and General(?)!": https://dl.acm.org/doi/10.1145/3309206 The GHashCell in this change is essentially a pair of 32-bit integer indices: . The sourceIndex is the index of the TpiSource object, and it represents an input type stream. The typeIndex is the index of the type in the stream. Together, we have something like a ragged 2D array of ghashes, which can be looked up as: tpiSources[tpiSrcIndex]->ghashes[typeIndex] By using these side tables, we can omit the key data from the hash table, and keep the table cell small. There is a cost to this: resolving hash table collisions requires many more loads than simply looking at the key in the same cache line as the insertion position. However, most supported platforms should have a 64-bit CAS operation to update the cell atomically. To make the result of concurrent insertion deterministic, the cell payloads must have a priority function. Defining one is pretty straightforward: compare the two 32-bit numbers as a combined 64-bit number. This means that types coming from inputs earlier on the command line have a higher priority and are more likely to appear earlier in the final PDB type stream than types from an input appearing later on the link line. After table insertion, the non-empty cells in the table can be copied out of the main table and sorted by priority to determine the ordering of the final type index stream. At this point, item and type records must be separated, either by sorting or by splitting into two arrays, and I chose sorting. This is why the GHashCell must contain the isItem bit. Once the final PDB TPI stream ordering is known, we need to compute a mapping from source type index to PDB type index. To avoid starting over from scratch and looking up every type again by its ghash, we save the insertion position of every hash table insertion during the first insertion phase. Because the table does not support rehashing, the insertion position is stable. Using the array of insertion positions indexed by source type index, we can replace the source type indices in the ghash table cells with the PDB type indices. Once the table cells have been updated to contain PDB type indices, the mapping for each type source can be computed in parallel. Simply iterate the list of cell positions and replace them with the PDB type index, since the insertion positions are no longer needed. Once we have a source to destination type index mapping for every type source, there are no more data dependencies. We know which type records are "unique" (not duplicates), and what their final type indices will be. We can do the remapping in parallel, and accumulate type sizes and type hashes in parallel by type source. Lastly, TPI stream layout must be done serially. Accumulate all the type records, sizes, and hashes, and add them to the PDB. Differential Revision: https://reviews.llvm.org/D87805 --- lld/COFF/DebugTypes.cpp | 841 ++++++++++++++++-- lld/COFF/DebugTypes.h | 116 ++- lld/COFF/Driver.cpp | 2 +- lld/COFF/PDB.cpp | 179 ++-- lld/COFF/PDB.h | 6 + lld/COFF/TypeMerger.h | 30 +- lld/include/lld/Common/ErrorHandler.h | 7 + lld/test/COFF/pdb-global-hashes.test | 2 +- lld/test/COFF/pdb-procid-remapping.test | 8 +- lld/test/COFF/pdb-type-server-missing.yaml | 1 + lld/test/COFF/pdb-type-server-simple.test | 9 +- lld/test/COFF/precomp-link.test | 10 +- lld/test/COFF/s_udt.s | 2 + .../llvm/DebugInfo/CodeView/TypeHashing.h | 12 +- .../llvm/DebugInfo/CodeView/TypeIndex.h | 11 +- .../DebugInfo/PDB/Native/TpiStreamBuilder.h | 9 +- llvm/lib/DebugInfo/CodeView/RecordName.cpp | 8 +- .../DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 62 +- 18 files changed, 1079 insertions(+), 236 deletions(-) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 46959334e6676..7f83685240129 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -10,9 +10,12 @@ #include "Chunks.h" #include "Driver.h" #include "InputFiles.h" +#include "PDB.h" #include "TypeMerger.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Timer.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" @@ -20,7 +23,10 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" using namespace llvm; @@ -54,6 +60,10 @@ class TypeServerSource : public TpiSource { } Error mergeDebugT(TypeMerger *m) override; + + void loadGHashes() override; + void remapTpiWithGHashes(GHashState *g) override; + bool isDependency() const override { return true; } PDBInputFile *pdbInputFile = nullptr; @@ -73,22 +83,29 @@ class TypeServerIpiSource : public TpiSource { friend class TypeServerSource; - // IPI merging is handled in TypeServerSource::mergeDebugT, since it depends - // directly on type merging. + // All of the TpiSource methods are no-ops. The parent TypeServerSource + // handles both TPI and IPI. Error mergeDebugT(TypeMerger *m) override { return Error::success(); } - + void loadGHashes() override {} + void remapTpiWithGHashes(GHashState *g) override {} bool isDependency() const override { return true; } }; // This class represents the debug type stream of an OBJ file that depends on a // PDB type server (see TypeServerSource). class UseTypeServerSource : public TpiSource { + Expected getTypeServerSource(); + public: UseTypeServerSource(ObjFile *f, TypeServer2Record ts) : TpiSource(UsingPDB, f), typeServerDependency(ts) {} Error mergeDebugT(TypeMerger *m) override; + // No need to load ghashes from /Zi objects. + void loadGHashes() override {} + void remapTpiWithGHashes(GHashState *g) override; + // Information about the PDB type server dependency, that needs to be loaded // in before merging this OBJ. TypeServer2Record typeServerDependency; @@ -110,6 +127,8 @@ class PrecompSource : public TpiSource { toString(it.first->second->file) + " and " + toString(file) + ")"); } + void loadGHashes() override; + bool isDependency() const override { return true; } static std::map mappings; @@ -124,21 +143,47 @@ class UsePrecompSource : public TpiSource { Error mergeDebugT(TypeMerger *m) override; + void loadGHashes() override; + void remapTpiWithGHashes(GHashState *g) override; + +private: + Error mergeInPrecompHeaderObj(); + +public: // Information about the Precomp OBJ dependency, that needs to be loaded in // before merging this OBJ. PrecompRecord precompDependency; }; } // namespace -static std::vector gc; +std::vector TpiSource::instances; +ArrayRef TpiSource::dependencySources; +ArrayRef TpiSource::objectSources; -TpiSource::TpiSource(TpiKind k, ObjFile *f) : kind(k), file(f) { - gc.push_back(this); +TpiSource::TpiSource(TpiKind k, ObjFile *f) + : kind(k), tpiSrcIdx(instances.size()), file(f) { + instances.push_back(this); } // Vtable key method. TpiSource::~TpiSource() = default; +void TpiSource::sortDependencies() { + // Order dependencies first, but preserve the existing order. + std::vector deps; + std::vector objs; + for (TpiSource *s : instances) + (s->isDependency() ? deps : objs).push_back(s); + uint32_t numDeps = deps.size(); + uint32_t numObjs = objs.size(); + instances = std::move(deps); + instances.insert(instances.end(), objs.begin(), objs.end()); + for (uint32_t i = 0, e = instances.size(); i < e; ++i) + instances[i]->tpiSrcIdx = i; + dependencySources = makeArrayRef(instances.data(), numDeps); + objectSources = makeArrayRef(instances.data() + numDeps, numObjs); +} + TpiSource *lld::coff::makeTpiSource(ObjFile *file) { return make(TpiSource::Regular, file); } @@ -165,14 +210,68 @@ TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file, return make(file, precomp); } -void TpiSource::forEachSource(llvm::function_ref fn) { - for_each(gc, fn); -} - std::map TypeServerSource::mappings; std::map PrecompSource::mappings; +bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { + if (ti.isSimple()) + return true; + + // This can be an item index or a type index. Choose the appropriate map. + ArrayRef tpiOrIpiMap = + (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; + if (ti.toArrayIndex() >= tpiOrIpiMap.size()) + return false; + ti = tpiOrIpiMap[ti.toArrayIndex()]; + return true; +} + +void TpiSource::remapRecord(MutableArrayRef rec, + ArrayRef typeRefs) { + MutableArrayRef contents = rec.drop_front(sizeof(RecordPrefix)); + for (const TiReference &ref : typeRefs) { + unsigned byteSize = ref.Count * sizeof(TypeIndex); + if (contents.size() < ref.Offset + byteSize) + fatal("symbol record too short"); + + MutableArrayRef indices( + reinterpret_cast(contents.data() + ref.Offset), ref.Count); + for (TypeIndex &ti : indices) { + if (!remapTypeIndex(ti, ref.Kind)) { + if (config->verbose) { + uint16_t kind = + reinterpret_cast(rec.data())->RecordKind; + StringRef fname = file ? file->getName() : ""; + log("failed to remap type index in record of kind 0x" + + utohexstr(kind) + " in " + fname + " with bad " + + (ref.Kind == TiRefKind::IndexRef ? "item" : "type") + + " index 0x" + utohexstr(ti.getIndex())); + } + ti = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + } + } +} + +void TpiSource::remapTypesInTypeRecord(MutableArrayRef rec) { + // TODO: Handle errors similar to symbols. + SmallVector typeRefs; + discoverTypeIndices(CVType(rec), typeRefs); + remapRecord(rec, typeRefs); +} + +bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef rec) { + // Discover type index references in the record. Skip it if we don't + // know where they are. + SmallVector typeRefs; + if (!discoverTypeIndicesInSymbol(rec, typeRefs)) + return false; + remapRecord(rec, typeRefs); + return true; +} + // A COFF .debug$H section is currently a clang extension. This function checks // if a .debug$H section is in a format that we expect / understand, so that we // can ignore any sections which are coincidentally also named .debug$H but do @@ -203,7 +302,6 @@ static Optional> getDebugH(ObjFile *file) { static ArrayRef getHashesFromDebugH(ArrayRef debugH) { assert(canUseDebugH(debugH)); - debugH = debugH.drop_front(sizeof(object::debug_h_header)); uint32_t count = debugH.size() / sizeof(GloballyHashedType); return {reinterpret_cast(debugH.data()), count}; @@ -211,32 +309,17 @@ getHashesFromDebugH(ArrayRef debugH) { // Merge .debug$T for a generic object file. Error TpiSource::mergeDebugT(TypeMerger *m) { + assert(!config->debugGHashes && + "use remapTpiWithGHashes when ghash is enabled"); + CVTypeArray types; BinaryStreamReader reader(file->debugTypes, support::little); cantFail(reader.readArray(types, reader.getLength())); - if (config->debugGHashes) { - ArrayRef hashes; - std::vector ownedHashes; - if (Optional> debugH = getDebugH(file)) - hashes = getHashesFromDebugH(*debugH); - else { - ownedHashes = GloballyHashedType::hashTypes(types); - hashes = ownedHashes; - } - - if (auto err = mergeTypeAndIdRecords(m->globalIDTable, m->globalTypeTable, - indexMapStorage, types, hashes, - file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); - } else { - if (auto err = - mergeTypeAndIdRecords(m->idTable, m->typeTable, indexMapStorage, - types, file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); - } + if (auto err = mergeTypeAndIdRecords( + m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature)) + fatal("codeview::mergeTypeAndIdRecords failed: " + + toString(std::move(err))); // In an object, there is only one mapping for both types and items. tpiMap = indexMapStorage; @@ -267,6 +350,9 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { // Merge types from a type server PDB. Error TypeServerSource::mergeDebugT(TypeMerger *m) { + assert(!config->debugGHashes && + "use remapTpiWithGHashes when ghash is enabled"); + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); Expected expectedTpi = pdbFile.getPDBTpiStream(); if (auto e = expectedTpi.takeError()) @@ -279,45 +365,18 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { maybeIpi = &*expectedIpi; } - if (config->debugGHashes) { - // PDBs do not actually store global hashes, so when merging a type server - // PDB we have to synthesize global hashes. To do this, we first synthesize - // global hashes for the TPI stream, since it is independent, then we - // synthesize hashes for the IPI stream, using the hashes for the TPI stream - // as inputs. - auto tpiHashes = GloballyHashedType::hashTypes(expectedTpi->typeArray()); - Optional endPrecomp; - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = - mergeTypeRecords(m->globalTypeTable, indexMapStorage, - expectedTpi->typeArray(), tpiHashes, endPrecomp)) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - auto ipiHashes = - GloballyHashedType::hashIds(maybeIpi->typeArray(), tpiHashes); - if (auto err = - mergeIdRecords(m->globalIDTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray(), ipiHashes)) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; - } - } else { - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, - expectedTpi->typeArray())) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray())) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; - } + // Merge TPI first, because the IPI stream will reference type indices. + if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, + expectedTpi->typeArray())) + fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); + tpiMap = indexMapStorage; + + // Merge IPI. + if (maybeIpi) { + if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, + maybeIpi->typeArray())) + fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); + ipiMap = ipiSrc->indexMapStorage; } if (config->showSummary) { @@ -337,7 +396,7 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { return Error::success(); } -Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { +Expected UseTypeServerSource::getTypeServerSource() { const codeview::GUID &tsId = typeServerDependency.getGuid(); StringRef tsPath = typeServerDependency.getName(); @@ -357,8 +416,15 @@ Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { tsSrc = (TypeServerSource *)pdb->debugTypesObj; } + return tsSrc; +} + +Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { + Expected tsSrc = getTypeServerSource(); + if (!tsSrc) + return tsSrc.takeError(); - pdb::PDBFile &pdbSession = tsSrc->pdbInputFile->session->getPDBFile(); + pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); auto expectedInfo = pdbSession.getPDBInfoStream(); if (!expectedInfo) return expectedInfo.takeError(); @@ -368,12 +434,12 @@ Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { // must match the GUID specified in the TypeServer2 record. if (expectedInfo->getGuid() != typeServerDependency.getGuid()) return createFileError( - tsPath, + typeServerDependency.getName(), make_error(pdb::pdb_error_code::signature_out_of_date)); // Reuse the type index map of the type server. - tpiMap = tsSrc->tpiMap; - ipiMap = tsSrc->ipiMap; + tpiMap = (*tsSrc)->tpiMap; + ipiMap = (*tsSrc)->ipiMap; return Error::success(); } @@ -399,26 +465,28 @@ static PrecompSource *findObjByName(StringRef fileNameOnly) { return nullptr; } -static Expected findPrecompMap(ObjFile *file, - PrecompRecord &pr) { +static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) { // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, // the paths embedded in the OBJs are in the Windows format. SmallString<128> prFileName = sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); - PrecompSource *precomp; auto it = PrecompSource::mappings.find(pr.getSignature()); if (it != PrecompSource::mappings.end()) { - precomp = it->second; - } else { - // Lookup by name - precomp = findObjByName(prFileName); + return it->second; } + // Lookup by name + return findObjByName(prFileName); +} + +static Expected findPrecompMap(ObjFile *file, + PrecompRecord &pr) { + PrecompSource *precomp = findPrecompSource(file, pr); if (!precomp) return createFileError( - prFileName, + pr.getPrecompFilePath(), make_error(pdb::pdb_error_code::no_matching_pch)); if (pr.getSignature() != file->pchSignature) @@ -437,11 +505,8 @@ static Expected findPrecompMap(ObjFile *file, /// Merges a precompiled headers TPI map into the current TPI map. The /// precompiled headers object will also be loaded and remapped in the /// process. -static Error -mergeInPrecompHeaderObj(ObjFile *file, - SmallVectorImpl &indexMapStorage, - PrecompRecord &precomp) { - auto e = findPrecompMap(file, precomp); +Error UsePrecompSource::mergeInPrecompHeaderObj() { + auto e = findPrecompMap(file, precompDependency); if (!e) return e.takeError(); @@ -449,11 +514,17 @@ mergeInPrecompHeaderObj(ObjFile *file, if (precompSrc->tpiMap.empty()) return Error::success(); - assert(precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex); - assert(precomp.getTypesCount() <= precompSrc->tpiMap.size()); + assert(precompDependency.getStartTypeIndex() == + TypeIndex::FirstNonSimpleIndex); + assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); // Use the previously remapped index map from the precompiled headers. indexMapStorage.append(precompSrc->tpiMap.begin(), - precompSrc->tpiMap.begin() + precomp.getTypesCount()); + precompSrc->tpiMap.begin() + + precompDependency.getTypesCount()); + + if (config->debugGHashes) + funcIdToType = precompSrc->funcIdToType; // FIXME: Save copy + return Error::success(); } @@ -462,8 +533,7 @@ Error UsePrecompSource::mergeDebugT(TypeMerger *m) { // precompiled headers object (/Yc) first. Some type indices in the current // object are referencing data in the precompiled headers object, so we need // both to be loaded. - if (Error e = - mergeInPrecompHeaderObj(file, indexMapStorage, precompDependency)) + if (Error e = mergeInPrecompHeaderObj()) return e; return TpiSource::mergeDebugT(m); @@ -478,7 +548,586 @@ uint32_t TpiSource::countPrecompObjs() { } void TpiSource::clear() { - gc.clear(); + // Clean up any owned ghash allocations. + clearGHashes(); + TpiSource::instances.clear(); TypeServerSource::mappings.clear(); PrecompSource::mappings.clear(); } + +//===----------------------------------------------------------------------===// +// Parellel GHash type merging implementation. +//===----------------------------------------------------------------------===// + +void TpiSource::loadGHashes() { + if (Optional> debugH = getDebugH(file)) { + ghashes = getHashesFromDebugH(*debugH); + ownedGHashes = false; + } else { + CVTypeArray types; + BinaryStreamReader reader(file->debugTypes, support::little); + cantFail(reader.readArray(types, reader.getLength())); + assignGHashesFromVector(GloballyHashedType::hashTypes(types)); + } + + fillIsItemIndexFromDebugT(); +} + +// Copies ghashes from a vector into an array. These are long lived, so it's +// worth the time to copy these into an appropriately sized vector to reduce +// memory usage. +void TpiSource::assignGHashesFromVector( + std::vector &&hashVec) { + GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; + memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); + ghashes = makeArrayRef(hashes, hashVec.size()); + ownedGHashes = true; +} + +// Faster way to iterate type records. forEachTypeChecked is faster than +// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. +static void forEachTypeChecked(ArrayRef types, + function_ref fn) { + checkError( + forEachCodeViewRecord(types, [fn](const CVType &ty) -> Error { + fn(ty); + return Error::success(); + })); +} + +// Walk over file->debugTypes and fill in the isItemIndex bit vector. +// TODO: Store this information in .debug$H so that we don't have to recompute +// it. This is the main bottleneck slowing down parallel ghashing with one +// thread over single-threaded ghashing. +void TpiSource::fillIsItemIndexFromDebugT() { + uint32_t index = 0; + isItemIndex.resize(ghashes.size()); + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + if (isIdRecord(ty.kind())) + isItemIndex.set(index); + ++index; + }); +} + +void TpiSource::mergeTypeRecord(CVType ty) { + // Decide if the merged type goes into TPI or IPI. + bool isItem = isIdRecord(ty.kind()); + MergedInfo &merged = isItem ? mergedIpi : mergedTpi; + + // Copy the type into our mutable buffer. + assert(ty.length() <= codeview::MaxRecordLength); + size_t offset = merged.recs.size(); + size_t newSize = alignTo(ty.length(), 4); + merged.recs.resize(offset + newSize); + auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize); + memcpy(newRec.data(), ty.data().data(), newSize); + + // Fix up the record prefix and padding bytes if it required resizing. + if (newSize != ty.length()) { + reinterpret_cast(newRec.data())->RecordLen = newSize - 2; + for (size_t i = ty.length(); i < newSize; ++i) + newRec[i] = LF_PAD0 + (newSize - i); + } + + // Remap the type indices in the new record. + remapTypesInTypeRecord(newRec); + uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); + merged.recSizes.push_back(static_cast(newSize)); + merged.recHashes.push_back(pdbHash); +} + +void TpiSource::mergeUniqueTypeRecords(ArrayRef typeRecords, + TypeIndex beginIndex) { + // Re-sort the list of unique types by index. + if (kind == PDB) + assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end())); + else + llvm::sort(uniqueTypes); + + // Accumulate all the unique types into one buffer in mergedTypes. + uint32_t ghashIndex = 0; + auto nextUniqueIndex = uniqueTypes.begin(); + assert(mergedTpi.recs.empty()); + assert(mergedIpi.recs.empty()); + forEachTypeChecked(typeRecords, [&](const CVType &ty) { + if (nextUniqueIndex != uniqueTypes.end() && + *nextUniqueIndex == ghashIndex) { + mergeTypeRecord(ty); + ++nextUniqueIndex; + } + if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { + bool success = ty.length() >= 12; + TypeIndex srcFuncIdIndex = beginIndex + ghashIndex; + TypeIndex funcId = srcFuncIdIndex; + TypeIndex funcType; + if (success) { + funcType = *reinterpret_cast(&ty.data()[8]); + success &= remapTypeIndex(funcId, TiRefKind::IndexRef); + success &= remapTypeIndex(funcType, TiRefKind::TypeRef); + } + if (success) { + funcIdToType.insert({funcId, funcType}); + } else { + StringRef fname = file ? file->getName() : ""; + warn("corrupt LF_[M]FUNC_ID record 0x" + + utohexstr(srcFuncIdIndex.getIndex()) + " in " + fname); + } + } + ++ghashIndex; + }); + assert(nextUniqueIndex == uniqueTypes.end() && + "failed to merge all desired records"); + assert(uniqueTypes.size() == + mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && + "missing desired record"); +} + +void TpiSource::remapTpiWithGHashes(GHashState *g) { + assert(config->debugGHashes && "ghashes must be enabled"); + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + ipiMap = indexMapStorage; + mergeUniqueTypeRecords(file->debugTypes); + // TODO: Free all unneeded ghash resources now that we have a full index map. +} + +// PDBs do not actually store global hashes, so when merging a type server +// PDB we have to synthesize global hashes. To do this, we first synthesize +// global hashes for the TPI stream, since it is independent, then we +// synthesize hashes for the IPI stream, using the hashes for the TPI stream +// as inputs. +void TypeServerSource::loadGHashes() { + // Don't hash twice. + if (!ghashes.empty()) + return; + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); + + // Hash TPI stream. + Expected expectedTpi = pdbFile.getPDBTpiStream(); + if (auto e = expectedTpi.takeError()) + fatal("Type server does not have TPI stream: " + toString(std::move(e))); + assignGHashesFromVector( + GloballyHashedType::hashTypes(expectedTpi->typeArray())); + isItemIndex.resize(ghashes.size()); + + // Hash IPI stream, which depends on TPI ghashes. + if (!pdbFile.hasPDBIpiStream()) + return; + Expected expectedIpi = pdbFile.getPDBIpiStream(); + if (auto e = expectedIpi.takeError()) + fatal("error retreiving IPI stream: " + toString(std::move(e))); + ipiSrc->assignGHashesFromVector( + GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); + + // The IPI stream isItemIndex bitvector should be all ones. + ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); + ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); +} + +// Flatten discontiguous PDB type arrays to bytes so that we can use +// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from +// type servers is faster than iterating all object files compiled with /Z7 with +// CVTypeArray, which has high overheads due to the virtual interface of +// BinaryStream::readBytes. +static ArrayRef typeArrayToBytes(const CVTypeArray &types) { + BinaryStreamRef stream = types.getUnderlyingStream(); + ArrayRef debugTypes; + checkError(stream.readBytes(0, stream.getLength(), debugTypes)); + return debugTypes; +} + +// Merge types from a type server PDB. +void TypeServerSource::remapTpiWithGHashes(GHashState *g) { + assert(config->debugGHashes && "ghashes must be enabled"); + + // IPI merging depends on TPI, so do TPI first, then do IPI. No need to + // propagate errors, those should've been handled during ghash loading. + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); + pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); + if (pdbFile.hasPDBIpiStream()) { + pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); + ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); + ipiSrc->fillMapFromGHashes(g, ipiSrc->indexMapStorage); + ipiMap = ipiSrc->indexMapStorage; + ipiSrc->tpiMap = tpiMap; + ipiSrc->ipiMap = ipiMap; + ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); + funcIdToType = ipiSrc->funcIdToType; // FIXME: Save copy + } +} + +void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { + // No remapping to do with /Zi objects. Simply use the index map from the type + // server. Errors should have been reported earlier. Symbols from this object + // will be ignored. + Expected maybeTsSrc = getTypeServerSource(); + if (!maybeTsSrc) { + typeMergingError = maybeTsSrc.takeError(); + return; + } + TypeServerSource *tsSrc = *maybeTsSrc; + tpiMap = tsSrc->tpiMap; + ipiMap = tsSrc->ipiMap; + funcIdToType = tsSrc->funcIdToType; // FIXME: Save copy +} + +void PrecompSource::loadGHashes() { + if (getDebugH(file)) { + warn("ignoring .debug$H section; pch with ghash is not implemented"); + } + + uint32_t ghashIdx = 0; + std::vector hashVec; + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + // Remember the index of the LF_ENDPRECOMP record so it can be excluded from + // the PDB. There must be an entry in the list of ghashes so that the type + // indexes of the following records in the /Yc PCH object line up. + if (ty.kind() == LF_ENDPRECOMP) + endPrecompGHashIdx = ghashIdx; + + hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); + isItemIndex.push_back(isIdRecord(ty.kind())); + ++ghashIdx; + }); + assignGHashesFromVector(std::move(hashVec)); +} + +void UsePrecompSource::loadGHashes() { + PrecompSource *pchSrc = findPrecompSource(file, precompDependency); + if (!pchSrc) + return; + + // To compute ghashes of a /Yu object file, we need to build on the the + // ghashes of the /Yc PCH object. After we are done hashing, discard the + // ghashes from the PCH source so we don't unnecessarily try to deduplicate + // them. + std::vector hashVec = + pchSrc->ghashes.take_front(precompDependency.getTypesCount()); + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); + isItemIndex.push_back(isIdRecord(ty.kind())); + }); + hashVec.erase(hashVec.begin(), + hashVec.begin() + precompDependency.getTypesCount()); + assignGHashesFromVector(std::move(hashVec)); +} + +void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { + // This object was compiled with /Yu, so process the corresponding + // precompiled headers object (/Yc) first. Some type indices in the current + // object are referencing data in the precompiled headers object, so we need + // both to be loaded. + if (Error e = mergeInPrecompHeaderObj()) { + typeMergingError = std::move(e); + return; + } + + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + ipiMap = indexMapStorage; + mergeUniqueTypeRecords(file->debugTypes, + TypeIndex(precompDependency.getStartTypeIndex() + + precompDependency.getTypesCount())); +} + +namespace { +/// A concurrent hash table for global type hashing. It is based on this paper: +/// Concurrent Hash Tables: Fast and General(?)! +/// https://dl.acm.org/doi/10.1145/3309206 +/// +/// This hash table is meant to be used in two phases: +/// 1. concurrent insertions +/// 2. concurrent reads +/// It does not support lookup, deletion, or rehashing. It uses linear probing. +/// +/// The paper describes storing a key-value pair in two machine words. +/// Generally, the values stored in this map are type indices, and we can use +/// those values to recover the ghash key from a side table. This allows us to +/// shrink the table entries further at the cost of some loads, and sidesteps +/// the need for a 128 bit atomic compare-and-swap operation. +/// +/// During insertion, a priority function is used to decide which insertion +/// should be preferred. This ensures that the output is deterministic. For +/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. +/// +class GHashCell; +struct GHashTable { + GHashCell *table = nullptr; + uint32_t tableSize = 0; + + GHashTable() = default; + ~GHashTable(); + + /// Initialize the table with the given size. Because the table cannot be + /// resized, the initial size of the table must be large enough to contain all + /// inputs, or insertion may not be able to find an empty cell. + void init(uint32_t newTableSize); + + /// Insert the cell with the given ghash into the table. Return the insertion + /// position in the table. It is safe for the caller to store the insertion + /// position because the table cannot be resized. + uint32_t insert(GloballyHashedType ghash, GHashCell newCell); +}; + +/// A ghash table cell for deduplicating types from TpiSources. +class GHashCell { + uint64_t data = 0; + +public: + GHashCell() = default; + + // Construct data most to least significant so that sorting works well: + // - isItem + // - tpiSrcIdx + // - ghashIdx + // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a + // non-zero representation. + GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) + : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | + ghashIdx) { + assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); + assert(ghashIdx == getGHashIdx() && "round trip failure"); + } + + explicit GHashCell(uint64_t data) : data(data) {} + + // The empty cell is all zeros. + bool isEmpty() const { return data == 0ULL; } + + /// Extract the tpiSrcIdx. + uint32_t getTpiSrcIdx() const { + return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; + } + + /// Extract the index into the ghash array of the TpiSource. + uint32_t getGHashIdx() const { return (uint32_t)data; } + + bool isItem() const { return data & (1ULL << 63U); } + + /// Get the ghash key for this cell. + GloballyHashedType getGHash() const { + return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()]; + } + + /// The priority function for the cell. The data is stored such that lower + /// tpiSrcIdx and ghashIdx values are preferred, which means that type record + /// from earlier sources are more likely to prevail. + friend inline bool operator<(const GHashCell &l, const GHashCell &r) { + return l.data < r.data; + } +}; +} // namespace + +namespace lld { +namespace coff { +/// This type is just a wrapper around GHashTable with external linkage so it +/// can be used from a header. +struct GHashState { + GHashTable table; +}; +} // namespace coff +} // namespace lld + +GHashTable::~GHashTable() { delete[] table; } + +void GHashTable::init(uint32_t newTableSize) { + table = new GHashCell[newTableSize]; + memset(table, 0, newTableSize * sizeof(GHashCell)); + tableSize = newTableSize; +} + +uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) { + assert(!newCell.isEmpty() && "cannot insert empty cell value"); + + // FIXME: The low bytes of SHA1 have low entropy for short records, which + // type records are. Swap the byte order for better entropy. A better ghash + // won't need this. + uint32_t startIdx = + ByteSwap_64(*reinterpret_cast(&ghash)) % tableSize; + + // Do a linear probe starting at startIdx. + uint32_t idx = startIdx; + while (true) { + // Run a compare and swap loop. There are four cases: + // - cell is empty: CAS into place and return + // - cell has matching key, earlier priority: do nothing, return + // - cell has matching key, later priority: CAS into place and return + // - cell has non-matching key: hash collision, probe next cell + auto *cellPtr = reinterpret_cast *>(&table[idx]); + GHashCell oldCell(cellPtr->load()); + while (oldCell.isEmpty() || oldCell.getGHash() == ghash) { + // Check if there is an existing ghash entry with a higher priority + // (earlier ordering). If so, this is a duplicate, we are done. + if (!oldCell.isEmpty() && oldCell < newCell) + return idx; + // Either the cell is empty, or our value is higher priority. Try to + // compare and swap. If it succeeds, we are done. + if (cellPtr->compare_exchange_weak(oldCell, newCell)) + return idx; + // If the CAS failed, check this cell again. + } + + // Advance the probe. Wrap around to the beginning if we run off the end. + ++idx; + idx = idx == tableSize ? 0 : idx; + if (idx == startIdx) { + // If this becomes an issue, we could mark failure and rehash from the + // beginning with a bigger table. There is no difference between rehashing + // internally and starting over. + report_fatal_error("ghash table is full"); + } + } + llvm_unreachable("left infloop"); +} + +TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc) + : typeTable(alloc), idTable(alloc) {} + +TypeMerger::~TypeMerger() = default; + +void TypeMerger::mergeTypesWithGHash() { + // Load ghashes. Do type servers and PCH objects first. + { + ScopedTimer t1(loadGHashTimer); + parallelForEach(TpiSource::dependencySources, + [&](TpiSource *source) { source->loadGHashes(); }); + parallelForEach(TpiSource::objectSources, + [&](TpiSource *source) { source->loadGHashes(); }); + } + + ScopedTimer t2(mergeGHashTimer); + GHashState ghashState; + + // Estimate the size of hash table needed to deduplicate ghashes. This *must* + // be larger than the number of unique types, or hash table insertion may not + // be able to find a vacant slot. Summing the input types guarantees this, but + // it is a gross overestimate. The table size could be reduced to save memory, + // but it would require implementing rehashing, and this table is generally + // small compared to total memory usage, at eight bytes per input type record, + // and most input type records are larger than eight bytes. + size_t tableSize = 0; + for (TpiSource *source : TpiSource::instances) + tableSize += source->ghashes.size(); + + // Cap the table size so that we can use 32-bit cell indices. Type indices are + // also 32-bit, so this is an inherent PDB file format limit anyway. + tableSize = std::min(size_t(INT32_MAX), tableSize); + ghashState.table.init(static_cast(tableSize)); + + // Insert ghashes in parallel. During concurrent insertion, we cannot observe + // the contents of the hash table cell, but we can remember the insertion + // position. Because the table does not rehash, the position will not change + // under insertion. After insertion is done, the value of the cell can be read + // to retreive the final PDB type index. + parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) { + TpiSource *source = TpiSource::instances[tpiSrcIdx]; + source->indexMapStorage.resize(source->ghashes.size()); + for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { + if (source->shouldOmitFromPdb(i)) { + source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + GloballyHashedType ghash = source->ghashes[i]; + bool isItem = source->isItemIndex.test(i); + uint32_t cellIdx = + ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i)); + + // Store the ghash cell index as a type index in indexMapStorage. Later + // we will replace it with the PDB type index. + source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); + } + }); + + // Collect all non-empty cells and sort them. This will implicitly assign + // destination type indices, and partition the entries into type records and + // item records. It arranges types in this order: + // - type records + // - source 0, type 0... + // - source 1, type 1... + // - item records + // - source 0, type 1... + // - source 1, type 0... + std::vector entries; + for (const GHashCell &cell : + makeArrayRef(ghashState.table.table, tableSize)) { + if (!cell.isEmpty()) + entries.push_back(cell); + } + parallelSort(entries, std::less()); + log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n", + double(entries.size()) / tableSize, entries.size(), tableSize)); + + // Find out how many type and item indices there are. + auto mid = + std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0)); + assert((mid == entries.end() || mid->isItem()) && + (mid == entries.begin() || !std::prev(mid)->isItem()) && + "midpoint is not midpoint"); + uint32_t numTypes = std::distance(entries.begin(), mid); + uint32_t numItems = std::distance(mid, entries.end()); + log("Tpi record count: " + Twine(numTypes)); + log("Ipi record count: " + Twine(numItems)); + + // Make a list of the "unique" type records to merge for each tpi source. Type + // merging will skip indices not on this list. Store the destination PDB type + // index for these unique types in the tpiMap for each source. The entries for + // non-unique types will be filled in prior to type merging. + for (uint32_t i = 0, e = entries.size(); i < e; ++i) { + auto &cell = entries[i]; + uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); + TpiSource *source = TpiSource::instances[tpiSrcIdx]; + source->uniqueTypes.push_back(cell.getGHashIdx()); + + // Update the ghash table to store the destination PDB type index in the + // table. + uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; + uint32_t ghashCellIndex = + source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); + ghashState.table.table[ghashCellIndex] = + GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); + } + + // In parallel, remap all types. + for_each(TpiSource::dependencySources, [&](TpiSource *source) { + source->remapTpiWithGHashes(&ghashState); + }); + parallelForEach(TpiSource::objectSources, [&](TpiSource *source) { + source->remapTpiWithGHashes(&ghashState); + }); + + TpiSource::clearGHashes(); +} + +/// Given the index into the ghash table for a particular type, return the type +/// index for that type in the output PDB. +static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, + uint32_t ghashCellIdx) { + GHashCell cell = g->table.table[ghashCellIdx]; + return TypeIndex::fromArrayIndex(cell.getGHashIdx()); +} + +// Fill in a TPI or IPI index map using ghashes. For each source type, use its +// ghash to lookup its final type index in the PDB, and store that in the map. +void TpiSource::fillMapFromGHashes(GHashState *g, + SmallVectorImpl &mapToFill) { + for (size_t i = 0, e = ghashes.size(); i < e; ++i) { + TypeIndex fakeCellIndex = indexMapStorage[i]; + if (fakeCellIndex.isSimple()) + mapToFill[i] = fakeCellIndex; + else + mapToFill[i] = loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); + } +} + +void TpiSource::clearGHashes() { + for (TpiSource *src : TpiSource::instances) { + if (src->ownedGHashes) + delete[] src->ghashes.data(); + src->ghashes = {}; + src->isItemIndex.clear(); + src->uniqueTypes.clear(); + } +} diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h index f97c0f7617445..17368244e5898 100644 --- a/lld/COFF/DebugTypes.h +++ b/lld/COFF/DebugTypes.h @@ -10,32 +10,37 @@ #define LLD_COFF_DEBUGTYPES_H #include "lld/Common/LLVM.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { namespace codeview { -class PrecompRecord; -class TypeServer2Record; +struct GloballyHashedType; } // namespace codeview namespace pdb { class NativeSession; +class TpiStream; } } // namespace llvm namespace lld { namespace coff { +using llvm::codeview::GloballyHashedType; using llvm::codeview::TypeIndex; class ObjFile; class PDBInputFile; class TypeMerger; +struct GHashState; class TpiSource { public: - enum TpiKind { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; + enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; TpiSource(TpiKind k, ObjFile *f); virtual ~TpiSource(); @@ -53,21 +58,97 @@ class TpiSource { /// caller-provided ObjectIndexMap. virtual Error mergeDebugT(TypeMerger *m); + /// Load global hashes, either by hashing types directly, or by loading them + /// from LLVM's .debug$H section. + virtual void loadGHashes(); + + /// Use global hashes to merge type information. + virtual void remapTpiWithGHashes(GHashState *g); + + // Remap a type index in place. + bool remapTypeIndex(TypeIndex &ti, llvm::codeview::TiRefKind refKind) const; + +protected: + void remapRecord(MutableArrayRef rec, + ArrayRef typeRefs); + + void mergeTypeRecord(llvm::codeview::CVType ty); + + // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex + // of the first record in this source, typically 0x1000. When PCHs are + // involved, it may start higher. + void mergeUniqueTypeRecords( + ArrayRef debugTypes, + TypeIndex beginIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex)); + + // Use the ghash table to construct a map from source type index to + // destination PDB type index. Usable for either TPI or IPI. + void fillMapFromGHashes(GHashState *m, + llvm::SmallVectorImpl &indexMap); + + // Copies ghashes from a vector into an array. These are long lived, so it's + // worth the time to copy these into an appropriately sized vector to reduce + // memory usage. + void assignGHashesFromVector(std::vector &&hashVec); + + // Walk over file->debugTypes and fill in the isItemIndex bit vector. + void fillIsItemIndexFromDebugT(); + +public: + bool remapTypesInSymbolRecord(MutableArrayRef rec); + + void remapTypesInTypeRecord(MutableArrayRef rec); + /// Is this a dependent file that needs to be processed first, before other /// OBJs? virtual bool isDependency() const { return false; } - static void forEachSource(llvm::function_ref fn); + /// Returns true if this type record should be omitted from the PDB, even if + /// it is unique. This prevents a record from being added to the input ghash + /// table. + bool shouldOmitFromPdb(uint32_t ghashIdx) { + return ghashIdx == endPrecompGHashIdx; + } + + /// All sources of type information in the program. + static std::vector instances; + + /// Dependency type sources, such as type servers or PCH object files. These + /// must be processed before objects that rely on them. Set by + /// TpiSources::sortDependencies. + static ArrayRef dependencySources; + + /// Object file sources. These must be processed after dependencySources. + static ArrayRef objectSources; + + /// Sorts the dependencies and reassigns TpiSource indices. + static void sortDependencies(); static uint32_t countTypeServerPDBs(); static uint32_t countPrecompObjs(); + /// Free heap allocated ghashes. + static void clearGHashes(); + /// Clear global data structures for TpiSources. static void clear(); const TpiKind kind; + bool ownedGHashes = true; + uint32_t tpiSrcIdx = 0; + +protected: + /// The ghash index (zero based, not 0x1000-based) of the LF_ENDPRECOMP record + /// in this object, if one exists. This is the all ones value otherwise. It is + /// recorded here so that it can be omitted from the final ghash table. + uint32_t endPrecompGHashIdx = ~0U; + +public: ObjFile *file; + /// An error encountered during type merging, if any. + Error typeMergingError = Error::success(); + // Storage for tpiMap or ipiMap, depending on the kind of source. llvm::SmallVector indexMapStorage; @@ -76,6 +157,31 @@ class TpiSource { // objects. llvm::ArrayRef tpiMap; llvm::ArrayRef ipiMap; + + /// Array of global type hashes, indexed by TypeIndex. May be calculated on + /// demand, or present in input object files. + llvm::ArrayRef ghashes; + + /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function + /// type index here. Both indices are PDB indices, not object type indexes. + llvm::DenseMap funcIdToType; + + /// Indicates if a type record is an item index or a type index. + llvm::BitVector isItemIndex; + + /// A list of all "unique" type indices which must be merged into the final + /// PDB. GHash type deduplication produces this list, and it should be + /// considerably smaller than the input. + std::vector uniqueTypes; + + struct MergedInfo { + std::vector recs; + std::vector recSizes; + std::vector recHashes; + }; + + MergedInfo mergedTpi; + MergedInfo mergedIpi; }; TpiSource *makeTpiSource(ObjFile *file); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index fb496a1c106f2..56717de226c29 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -69,13 +69,13 @@ bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS, lld::stderrOS = &stderrOS; errorHandler().cleanupCallback = []() { + TpiSource::clear(); freeArena(); ObjFile::instances.clear(); PDBInputFile::instances.clear(); ImportFile::instances.clear(); BitcodeFile::instances.clear(); memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances)); - TpiSource::clear(); OutputSection::clear(); }; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index bfa7bd8148dfd..9cf5e42c0b6aa 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -66,7 +66,8 @@ using llvm::object::coff_section; static ExitOnError exitOnErr; static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); - +Timer lld::coff::loadGHashTimer("Global Type Hashing", totalPdbLinkTimer); +Timer lld::coff::mergeGHashTimer("GHash Type Merging", totalPdbLinkTimer); static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer); static Timer typeMergingTimer("Type Merging", addObjectsTimer); static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer); @@ -112,8 +113,6 @@ class PDBLinker { /// externally. void addDebug(TpiSource *source); - bool mergeTypeRecords(TpiSource *source); - void addDebugSymbols(TpiSource *source); void mergeSymbolRecords(TpiSource *source, @@ -250,43 +249,18 @@ static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder, }); } -static bool remapTypeIndex(TypeIndex &ti, ArrayRef typeIndexMap) { - if (ti.isSimple()) - return true; - if (ti.toArrayIndex() >= typeIndexMap.size()) - return false; - ti = typeIndexMap[ti.toArrayIndex()]; - return true; -} - -static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind, - MutableArrayRef recordBytes, - TpiSource *source, - ArrayRef typeRefs) { - MutableArrayRef contents = - recordBytes.drop_front(sizeof(RecordPrefix)); - for (const TiReference &ref : typeRefs) { - unsigned byteSize = ref.Count * sizeof(TypeIndex); - if (contents.size() < ref.Offset + byteSize) - fatal("symbol record too short"); - - // This can be an item index or a type index. Choose the appropriate map. - bool isItemIndex = ref.Kind == TiRefKind::IndexRef; - ArrayRef typeOrItemMap = - isItemIndex ? source->ipiMap : source->tpiMap; - - MutableArrayRef tIs( - reinterpret_cast(contents.data() + ref.Offset), ref.Count); - for (TypeIndex &ti : tIs) { - if (!remapTypeIndex(ti, typeOrItemMap)) { - log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " + - file->getName() + " with bad " + (isItemIndex ? "item" : "type") + - " index 0x" + utohexstr(ti.getIndex())); - ti = TypeIndex(SimpleTypeKind::NotTranslated); - continue; - } - } - } +static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) { + // Start the TPI or IPI stream header. + builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80); + builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80); + for_each(TpiSource::instances, [&](TpiSource *source) { + builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs, + source->mergedTpi.recSizes, + source->mergedTpi.recHashes); + builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs, + source->mergedIpi.recSizes, + source->mergedIpi.recHashes); + }); } static void @@ -329,7 +303,7 @@ static SymbolKind symbolKind(ArrayRef recordData) { /// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32 static void translateIdSymbols(MutableArrayRef &recordData, - TypeCollection &idTable) { + TypeMerger &tMerger, TpiSource *source) { RecordPrefix *prefix = reinterpret_cast(recordData.data()); SymbolKind kind = symbolKind(recordData); @@ -356,13 +330,25 @@ static void translateIdSymbols(MutableArrayRef &recordData, reinterpret_cast(content.data() + refs[0].Offset); // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in // the IPI stream, whose `FunctionType` member refers to the TPI stream. - // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and + // Note that LF_FUNC_ID and LF_MFUNC_ID have the same record layout, and // in both cases we just need the second type index. if (!ti->isSimple() && !ti->isNoneType()) { - CVType funcIdData = idTable.getType(*ti); - ArrayRef tiBuf = funcIdData.data().slice(8, 4); - assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record"); - *ti = *reinterpret_cast(tiBuf.data()); + if (config->debugGHashes) { + auto idToType = source->funcIdToType.find(*ti); + if (idToType == source->funcIdToType.end()) { + warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item " + "index {1:X} which is not a LF_[M]FUNC_ID record", + source->file->getName(), ti->getIndex())); + *ti = TypeIndex(SimpleTypeKind::NotTranslated); + } else { + *ti = idToType->second; + } + } else { + CVType funcIdData = tMerger.getIDTable().getType(*ti); + ArrayRef tiBuf = funcIdData.data().slice(8, 4); + assert(tiBuf.size() == 4 && "corrupt LF_[M]FUNC_ID record"); + *ti = *reinterpret_cast(tiBuf.data()); + } } kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32 @@ -561,22 +547,16 @@ void PDBLinker::mergeSymbolRecords(TpiSource *source, const_cast(sym.data().data()), sym.length()); } - // Discover type index references in the record. Skip it if we don't - // know where they are. - SmallVector typeRefs; - if (!discoverTypeIndicesInSymbol(sym, typeRefs)) { - log("ignoring unknown symbol record with kind 0x" + - utohexstr(sym.kind())); + // Re-map all the type index references. + if (!source->remapTypesInSymbolRecord(recordBytes)) { + log("error remapping types in symbol of kind 0x" + + utohexstr(sym.kind()) + ", ignoring"); return Error::success(); } - // Re-map all the type index references. - remapTypesInSymbolRecord(file, sym.kind(), recordBytes, source, - typeRefs); - // An object file may have S_xxx_ID symbols, but these get converted to // "real" symbols in a PDB. - translateIdSymbols(recordBytes, tMerger.getIDTable()); + translateIdSymbols(recordBytes, tMerger, source); sym = CVSymbol(recordBytes); // If this record refers to an offset in the object file's string table, @@ -748,11 +728,15 @@ void DebugSHandler::mergeInlineeLines( const DebugSubsectionRecord &inlineeSubsection) { DebugInlineeLinesSubsectionRef inlineeLines; exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData())); + if (!source) { + warn("ignoring inlinee lines section in file that lacks type information"); + return; + } // Remap type indices in inlinee line records in place. for (const InlineeSourceLine &line : inlineeLines) { TypeIndex &inlinee = *const_cast(&line.Header->Inlinee); - if (!remapTypeIndex(inlinee, source->ipiMap)) { + if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) { log("bad inlinee line record in " + file.getName() + " with bad inlinee index 0x" + utohexstr(inlinee.getIndex())); } @@ -827,20 +811,6 @@ static void warnUnusable(InputFile *f, Error e) { warn(msg); } -bool PDBLinker::mergeTypeRecords(TpiSource *source) { - ScopedTimer t(typeMergingTimer); - // Before we can process symbol substreams from .debug$S, we need to process - // type information, file checksums, and the string table. Add type info to - // the PDB first, so that we can get the map from object file type and item - // indices to PDB type and item indices. - if (Error e = source->mergeDebugT(&tMerger)) { - // If the .debug$T sections fail to merge, assume there is no debug info. - warnUnusable(source->file, std::move(e)); - return false; - } - return true; -} - // Allocate memory for a .debug$S / .debug$F section and relocate it. static ArrayRef relocateDebugChunk(SectionChunk &debugChunk) { uint8_t *buffer = bAlloc.Allocate(debugChunk.getSize()); @@ -920,9 +890,27 @@ static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) { } void PDBLinker::addDebug(TpiSource *source) { - // If type merging failed, ignore the symbols. - if (mergeTypeRecords(source)) - addDebugSymbols(source); + // Before we can process symbol substreams from .debug$S, we need to process + // type information, file checksums, and the string table. Add type info to + // the PDB first, so that we can get the map from object file type and item + // indices to PDB type and item indices. If we are using ghashes, types have + // already been merged. + if (!config->debugGHashes) { + ScopedTimer t(typeMergingTimer); + if (Error e = source->mergeDebugT(&tMerger)) { + // If type merging failed, ignore the symbols. + warnUnusable(source->file, std::move(e)); + return; + } + } else { + // If type merging failed, ignore the symbols. + if (source->typeMergingError) { + warnUnusable(source->file, std::move(source->typeMergingError)); + return; + } + } + + addDebugSymbols(source); } static pdb::BulkPublic createPublic(Defined *def) { @@ -955,25 +943,31 @@ void PDBLinker::addObjectsToPDB() { for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(builder, obj); }); - // Merge dependencies - TpiSource::forEachSource([&](TpiSource *source) { - if (source->isDependency()) - addDebug(source); - }); + // Reorder dependency type sources to come first. + TpiSource::sortDependencies(); - // Merge regular and dependent OBJs - TpiSource::forEachSource([&](TpiSource *source) { - if (!source->isDependency()) - addDebug(source); - }); + // Merge type information from input files using global type hashing. + if (config->debugGHashes) + tMerger.mergeTypesWithGHash(); + + // Merge dependencies and then regular objects. + for_each(TpiSource::dependencySources, + [&](TpiSource *source) { addDebug(source); }); + for_each(TpiSource::objectSources, + [&](TpiSource *source) { addDebug(source); }); builder.getStringTableBuilder().setStrings(pdbStrTab); t1.stop(); // Construct TPI and IPI stream contents. ScopedTimer t2(tpiStreamLayoutTimer); - addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); - addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + // Collect all the merged types. + if (config->debugGHashes) { + addGHashTypeInfo(builder); + } else { + addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); + addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + } t2.stop(); } @@ -1014,8 +1008,8 @@ void PDBLinker::printStats() { "Input OBJ files (expanded from all cmd-line inputs)"); print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies"); print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies"); - print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(), - "Merged TPI records"); + print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records"); + print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records"); print(pdbStrTab.size(), "Output PDB strings"); print(globalSymbols, "Global symbol records"); print(moduleSymbols, "Module symbol records"); @@ -1067,8 +1061,11 @@ void PDBLinker::printStats() { } }; - printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); - printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); + if (!config->debugGHashes) { + // FIXME: Reimplement for ghash. + printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); + printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); + } message(buffer); } diff --git a/lld/COFF/PDB.h b/lld/COFF/PDB.h index 273609ea788c5..53506d40baef4 100644 --- a/lld/COFF/PDB.h +++ b/lld/COFF/PDB.h @@ -20,6 +20,8 @@ union DebugInfo; } namespace lld { +class Timer; + namespace coff { class OutputSection; class SectionChunk; @@ -32,6 +34,10 @@ void createPDB(SymbolTable *symtab, llvm::Optional> getFileLineCodeView(const SectionChunk *c, uint32_t addr); + +extern Timer loadGHashTimer; +extern Timer mergeGHashTimer; + } // namespace coff } // namespace lld diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h index d3184a7f18d74..be877cfda6e6b 100644 --- a/lld/COFF/TypeMerger.h +++ b/lld/COFF/TypeMerger.h @@ -10,45 +10,47 @@ #define LLD_COFF_TYPEMERGER_H #include "Config.h" -#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/Support/Allocator.h" +#include namespace lld { namespace coff { +using llvm::codeview::GloballyHashedType; +using llvm::codeview::TypeIndex; + +struct GHashState; + class TypeMerger { public: - TypeMerger(llvm::BumpPtrAllocator &alloc) - : typeTable(alloc), idTable(alloc), globalTypeTable(alloc), - globalIDTable(alloc) {} + TypeMerger(llvm::BumpPtrAllocator &alloc); + + ~TypeMerger(); /// Get the type table or the global type table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getTypeTable() { - if (config->debugGHashes) - return globalTypeTable; + assert(!config->debugGHashes); return typeTable; } /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getIDTable() { - if (config->debugGHashes) - return globalIDTable; + assert(!config->debugGHashes); return idTable; } + /// Use global hashes to eliminate duplicate types and identify unique type + /// indices in each TpiSource. + void mergeTypesWithGHash(); + /// Type records that will go into the PDB TPI stream. llvm::codeview::MergingTypeTableBuilder typeTable; /// Item records that will go into the PDB IPI stream. llvm::codeview::MergingTypeTableBuilder idTable; - /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH) - llvm::codeview::GlobalTypeTableBuilder globalTypeTable; - - /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH) - llvm::codeview::GlobalTypeTableBuilder globalIDTable; - // When showSummary is enabled, these are histograms of TPI and IPI records // keyed by type index. SmallVector tpiCounts; diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index 4ffc564e67e2f..79a5940823bdf 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -153,6 +153,13 @@ template T check(Expected e) { return std::move(*e); } +// Don't move from Expected wrappers around references. +template T &check(Expected e) { + if (!e) + fatal(llvm::toString(e.takeError())); + return *e; +} + template T check2(ErrorOr e, llvm::function_ref prefix) { if (auto ec = e.getError()) diff --git a/lld/test/COFF/pdb-global-hashes.test b/lld/test/COFF/pdb-global-hashes.test index 13039d42fe26a..430275b7a8848 100644 --- a/lld/test/COFF/pdb-global-hashes.test +++ b/lld/test/COFF/pdb-global-hashes.test @@ -2,7 +2,7 @@ RUN: yaml2obj %p/Inputs/pdb-hashes-1.yaml -o %t.1.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2.yaml -o %t.2.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2-missing.yaml -o %t.2.missing.obj RUN: lld-link /debug %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.nohash.pdb -RUN: lld-link /debug:ghash %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb +RUN: lld-link /debug:ghash -verbose %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb RUN: lld-link /debug:ghash %t.1.obj %t.2.missing.obj /entry:main /nodefaultlib /PDB:%t.mixed.pdb RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.nohash.pdb | FileCheck %s RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.hash.pdb | FileCheck %s diff --git a/lld/test/COFF/pdb-procid-remapping.test b/lld/test/COFF/pdb-procid-remapping.test index d7ea775be98e7..adc93585f2aac 100644 --- a/lld/test/COFF/pdb-procid-remapping.test +++ b/lld/test/COFF/pdb-procid-remapping.test @@ -1,8 +1,12 @@ -# RUN: yaml2obj %p/Inputs/pdb1.yaml -o %t1.obj -# RUN: yaml2obj %p/Inputs/pdb2.yaml -o %t2.obj +# RUN: yaml2obj < %p/Inputs/pdb1.yaml > %t1.obj +# RUN: yaml2obj < %p/Inputs/pdb2.yaml > %t2.obj + # RUN: lld-link /debug /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ # RUN: %t1.obj %t2.obj +# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s +# RUN: lld-link /debug /debug:ghash /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ +# RUN: %t1.obj %t2.obj # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s CHECK: Symbols diff --git a/lld/test/COFF/pdb-type-server-missing.yaml b/lld/test/COFF/pdb-type-server-missing.yaml index 1a8c9a05c3d9c..78ddc0e4adb28 100644 --- a/lld/test/COFF/pdb-type-server-missing.yaml +++ b/lld/test/COFF/pdb-type-server-missing.yaml @@ -5,6 +5,7 @@ # RUN: yaml2obj %s -o %t1.obj # RUN: yaml2obj %p/Inputs/pdb-type-server-missing-2.yaml -o %t2.obj # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN +# RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug:ghash -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 2>&1 | FileCheck %s -check-prefix=IGNORE -allow-empty # RUN: not lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /WX 2>&1 | FileCheck %s -check-prefix=ERR # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 /WX 2>&1 | FileCheck %s -check-prefix=IGNORE-ERR -allow-empty diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test index bcba6da28b690..b954712d9b6c3 100644 --- a/lld/test/COFF/pdb-type-server-simple.test +++ b/lld/test/COFF/pdb-type-server-simple.test @@ -20,7 +20,11 @@ RUN: rm -rf %t && mkdir -p %t && cd %t RUN: yaml2obj %S/Inputs/pdb-type-server-simple-a.yaml -o a.obj RUN: yaml2obj %S/Inputs/pdb-type-server-simple-b.yaml -o b.obj RUN: llvm-pdbutil yaml2pdb %S/Inputs/pdb-type-server-simple-ts.yaml -pdb ts.pdb -RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib /summary | FileCheck %s -check-prefix SUMMARY +RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib -summary | FileCheck %s -check-prefix SUMMARY +RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s + +Re-run with /DEBUG:GHASH +RUN: lld-link a.obj b.obj -entry:main -debug:ghash -out:t.exe -pdb:t.pdb -nodefaultlib -summary -verbose RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s @@ -101,7 +105,8 @@ SUMMARY-NEXT: ------------------------------------------------------------------ SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 1 PDB type server dependencies SUMMARY-NEXT: 0 Precomp OBJ dependencies -SUMMARY-NEXT: 25 Merged TPI records +SUMMARY-NEXT: 9 Merged TPI records +SUMMARY-NEXT: 16 Merged IPI records SUMMARY-NEXT: 3 Output PDB strings SUMMARY-NEXT: 4 Global symbol records SUMMARY-NEXT: 14 Module symbol records diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test index b0692ee8002f7..161ee88d27f5e 100644 --- a/lld/test/COFF/precomp-link.test +++ b/lld/test/COFF/precomp-link.test @@ -5,6 +5,7 @@ RUN: lld-link %S/Inputs/precomp.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b. RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE +RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE FIXME: The following RUN line should fail, regardless of whether debug info is enabled or not. Normally this would result in an error due to missing _PchSym_ @@ -52,12 +53,19 @@ CHECK-NOT: LF_PRECOMP CHECK-NOT: LF_ENDPRECOMP +Re-run with ghash. Eventually, perhaps this will be the default. + +RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf /summary | FileCheck %s -check-prefix SUMMARY +RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s + + SUMMARY: Summary SUMMARY-NEXT: -------------------------------------------------------------------------------- SUMMARY-NEXT: 3 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 0 PDB type server dependencies SUMMARY-NEXT: 1 Precomp OBJ dependencies -SUMMARY-NEXT: 1044 Merged TPI records +SUMMARY-NEXT: 874 Merged TPI records +SUMMARY-NEXT: 170 Merged IPI records SUMMARY-NEXT: 5 Output PDB strings SUMMARY-NEXT: 167 Global symbol records SUMMARY-NEXT: 20 Module symbol records diff --git a/lld/test/COFF/s_udt.s b/lld/test/COFF/s_udt.s index 63e4099709575..373394334b19c 100644 --- a/lld/test/COFF/s_udt.s +++ b/lld/test/COFF/s_udt.s @@ -2,6 +2,8 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-windows-msvc < %s > %t.obj # RUN: lld-link /DEBUG:FULL /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe # RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s +# RUN: lld-link /DEBUG:FULL /debug:ghash /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe +# RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s # CHECK: Types (TPI Stream) # CHECK-NEXT: ============================================================ diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h index b0a16cccbff31..e6ade770457c2 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -86,6 +86,16 @@ struct GloballyHashedType { bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + friend inline bool operator==(const GloballyHashedType &L, + const GloballyHashedType &R) { + return L.Hash == R.Hash; + } + + friend inline bool operator!=(const GloballyHashedType &L, + const GloballyHashedType &R) { + return !(L.Hash == R.Hash); + } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -206,7 +216,7 @@ template <> struct DenseMapInfo { static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS) { - return LHS.Hash == RHS.Hash; + return LHS == RHS; } }; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index b9e2562bfc2b1..bdc6cf46509bc 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -116,13 +116,22 @@ class TypeIndex { uint32_t toArrayIndex() const { assert(!isSimple()); - return getIndex() - FirstNonSimpleIndex; + return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex; } static TypeIndex fromArrayIndex(uint32_t Index) { return TypeIndex(Index + FirstNonSimpleIndex); } + static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) { + return TypeIndex((Index + FirstNonSimpleIndex) | + (IsItem ? DecoratedItemIdMask : 0)); + } + + TypeIndex removeDecoration() { + return TypeIndex(Index & ~DecoratedItemIdMask); + } + SimpleTypeKind getSimpleKind() const { assert(isSimple()); return static_cast(Index & SimpleKindMask); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index 72d98e9c2c4d1..9ef2ee6a93070 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -54,16 +54,20 @@ class TpiStreamBuilder { void setVersionHeader(PdbRaw_TpiVer Version); void addTypeRecord(ArrayRef Type, Optional Hash); + void addTypeRecords(ArrayRef Types, ArrayRef Sizes, + ArrayRef Hashes); Error finalizeMsfLayout(); - uint32_t getRecordCount() const { return TypeRecords.size(); } + uint32_t getRecordCount() const { return TypeRecordCount; } Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); private: + void updateTypeIndexOffsets(ArrayRef Sizes); + uint32_t calculateHashBufferSize() const; uint32_t calculateIndexOffsetSize() const; Error finalize(); @@ -71,10 +75,11 @@ class TpiStreamBuilder { msf::MSFBuilder &Msf; BumpPtrAllocator &Allocator; + uint32_t TypeRecordCount = 0; size_t TypeRecordBytes = 0; PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; - std::vector> TypeRecords; + std::vector> TypeRecBuffers; std::vector TypeHashes; std::vector TypeIndexOffsets; uint32_t HashStreamIndex = kInvalidStreamIndex; diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp index 47b5498181b7f..1ca899789bef2 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" @@ -77,9 +78,10 @@ Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { uint32_t Size = Indices.size(); Name = "("; for (uint32_t I = 0; I < Size; ++I) { - assert(Indices[I] < CurrentTypeIndex); - - Name.append(Types.getTypeName(Indices[I])); + if (Indices[I] < CurrentTypeIndex) + Name.append(Types.getTypeName(Indices[I])); + else + Name.append(""); if (I + 1 != Size) Name.append(", "); } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 51a1f0a544e3c..b5e7b03e6917f 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Error.h" #include #include +#include using namespace llvm; using namespace llvm::msf; @@ -41,39 +42,68 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { VerHeader = Version; } +void TpiStreamBuilder::updateTypeIndexOffsets(ArrayRef Sizes) { + // If we just crossed an 8KB threshold, add a type index offset. + for (uint16_t Size : Sizes) { + size_t NewSize = TypeRecordBytes + Size; + constexpr size_t EightKB = 8 * 1024; + if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecordCount == 0) { + TypeIndexOffsets.push_back( + {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + + TypeRecordCount), + ulittle32_t(TypeRecordBytes)}); + } + ++TypeRecordCount; + TypeRecordBytes = NewSize; + } +} + void TpiStreamBuilder::addTypeRecord(ArrayRef Record, Optional Hash) { - // If we just crossed an 8KB threshold, add a type index offset. assert(((Record.size() & 3) == 0) && "The type record's size is not a multiple of 4 bytes which will " "cause misalignment in the output TPI stream!"); - size_t NewSize = TypeRecordBytes + Record.size(); - constexpr size_t EightKB = 8 * 1024; - if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { - TypeIndexOffsets.push_back( - {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + - TypeRecords.size()), - ulittle32_t(TypeRecordBytes)}); - } - TypeRecordBytes = NewSize; + assert(Record.size() <= codeview::MaxRecordLength); + uint16_t OneSize = (uint16_t)Record.size(); + updateTypeIndexOffsets(makeArrayRef(&OneSize, 1)); - TypeRecords.push_back(Record); + TypeRecBuffers.push_back(Record); + // FIXME: Require it. if (Hash) TypeHashes.push_back(*Hash); } +void TpiStreamBuilder::addTypeRecords(ArrayRef Types, + ArrayRef Sizes, + ArrayRef Hashes) { + // Ignore empty type buffers. There should be no hashes or sizes in this case. + if (Types.empty()) { + assert(Sizes.empty() && Hashes.empty()); + return; + } + + assert(((Types.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + assert(Sizes.size() == Hashes.size() && "sizes and hashes should be in sync"); + assert(std::accumulate(Sizes.begin(), Sizes.end(), 0U) == Types.size() && + "sizes of type records should sum to the size of the types"); + updateTypeIndexOffsets(Sizes); + + TypeRecBuffers.push_back(Types); + TypeHashes.insert(TypeHashes.end(), Hashes.begin(), Hashes.end()); +} + Error TpiStreamBuilder::finalize() { if (Header) return Error::success(); TpiStreamHeader *H = Allocator.Allocate(); - uint32_t Count = TypeRecords.size(); - H->Version = VerHeader; H->HeaderSize = sizeof(TpiStreamHeader); H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; - H->TypeIndexEnd = H->TypeIndexBegin + Count; + H->TypeIndexEnd = H->TypeIndexBegin + TypeRecordCount; H->TypeRecordBytes = TypeRecordBytes; H->HashStreamIndex = HashStreamIndex; @@ -104,7 +134,7 @@ uint32_t TpiStreamBuilder::calculateSerializedLength() { } uint32_t TpiStreamBuilder::calculateHashBufferSize() const { - assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) && + assert((TypeRecordCount == TypeHashes.size() || TypeHashes.empty()) && "either all or no type records should have hashes"); return TypeHashes.size() * sizeof(ulittle32_t); } @@ -155,7 +185,7 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecords) { + for (auto Rec : TypeRecBuffers) { assert(!Rec.empty() && "Attempting to write an empty type record shifts " "all offsets in the TPI stream!"); assert(((Rec.size() & 3) == 0) && From 8d250ac3cd48d0f17f9314685a85e77895c05351 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 30 Sep 2020 14:55:32 -0700 Subject: [PATCH 209/544] Revert "[PDB] Merge types in parallel when using ghashing" This reverts commit 49b3459930655d879b2dc190ff8fe11c38a8be5f. --- lld/COFF/DebugTypes.cpp | 841 ++---------------- lld/COFF/DebugTypes.h | 116 +-- lld/COFF/Driver.cpp | 2 +- lld/COFF/PDB.cpp | 179 ++-- lld/COFF/PDB.h | 6 - lld/COFF/TypeMerger.h | 30 +- lld/include/lld/Common/ErrorHandler.h | 7 - lld/test/COFF/pdb-global-hashes.test | 2 +- lld/test/COFF/pdb-procid-remapping.test | 8 +- lld/test/COFF/pdb-type-server-missing.yaml | 1 - lld/test/COFF/pdb-type-server-simple.test | 9 +- lld/test/COFF/precomp-link.test | 10 +- lld/test/COFF/s_udt.s | 2 - .../llvm/DebugInfo/CodeView/TypeHashing.h | 12 +- .../llvm/DebugInfo/CodeView/TypeIndex.h | 11 +- .../DebugInfo/PDB/Native/TpiStreamBuilder.h | 9 +- llvm/lib/DebugInfo/CodeView/RecordName.cpp | 8 +- .../DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 62 +- 18 files changed, 236 insertions(+), 1079 deletions(-) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 7f83685240129..46959334e6676 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -10,12 +10,9 @@ #include "Chunks.h" #include "Driver.h" #include "InputFiles.h" -#include "PDB.h" #include "TypeMerger.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" -#include "lld/Common/Timer.h" -#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" @@ -23,10 +20,7 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" using namespace llvm; @@ -60,10 +54,6 @@ class TypeServerSource : public TpiSource { } Error mergeDebugT(TypeMerger *m) override; - - void loadGHashes() override; - void remapTpiWithGHashes(GHashState *g) override; - bool isDependency() const override { return true; } PDBInputFile *pdbInputFile = nullptr; @@ -83,29 +73,22 @@ class TypeServerIpiSource : public TpiSource { friend class TypeServerSource; - // All of the TpiSource methods are no-ops. The parent TypeServerSource - // handles both TPI and IPI. + // IPI merging is handled in TypeServerSource::mergeDebugT, since it depends + // directly on type merging. Error mergeDebugT(TypeMerger *m) override { return Error::success(); } - void loadGHashes() override {} - void remapTpiWithGHashes(GHashState *g) override {} + bool isDependency() const override { return true; } }; // This class represents the debug type stream of an OBJ file that depends on a // PDB type server (see TypeServerSource). class UseTypeServerSource : public TpiSource { - Expected getTypeServerSource(); - public: UseTypeServerSource(ObjFile *f, TypeServer2Record ts) : TpiSource(UsingPDB, f), typeServerDependency(ts) {} Error mergeDebugT(TypeMerger *m) override; - // No need to load ghashes from /Zi objects. - void loadGHashes() override {} - void remapTpiWithGHashes(GHashState *g) override; - // Information about the PDB type server dependency, that needs to be loaded // in before merging this OBJ. TypeServer2Record typeServerDependency; @@ -127,8 +110,6 @@ class PrecompSource : public TpiSource { toString(it.first->second->file) + " and " + toString(file) + ")"); } - void loadGHashes() override; - bool isDependency() const override { return true; } static std::map mappings; @@ -143,47 +124,21 @@ class UsePrecompSource : public TpiSource { Error mergeDebugT(TypeMerger *m) override; - void loadGHashes() override; - void remapTpiWithGHashes(GHashState *g) override; - -private: - Error mergeInPrecompHeaderObj(); - -public: // Information about the Precomp OBJ dependency, that needs to be loaded in // before merging this OBJ. PrecompRecord precompDependency; }; } // namespace -std::vector TpiSource::instances; -ArrayRef TpiSource::dependencySources; -ArrayRef TpiSource::objectSources; +static std::vector gc; -TpiSource::TpiSource(TpiKind k, ObjFile *f) - : kind(k), tpiSrcIdx(instances.size()), file(f) { - instances.push_back(this); +TpiSource::TpiSource(TpiKind k, ObjFile *f) : kind(k), file(f) { + gc.push_back(this); } // Vtable key method. TpiSource::~TpiSource() = default; -void TpiSource::sortDependencies() { - // Order dependencies first, but preserve the existing order. - std::vector deps; - std::vector objs; - for (TpiSource *s : instances) - (s->isDependency() ? deps : objs).push_back(s); - uint32_t numDeps = deps.size(); - uint32_t numObjs = objs.size(); - instances = std::move(deps); - instances.insert(instances.end(), objs.begin(), objs.end()); - for (uint32_t i = 0, e = instances.size(); i < e; ++i) - instances[i]->tpiSrcIdx = i; - dependencySources = makeArrayRef(instances.data(), numDeps); - objectSources = makeArrayRef(instances.data() + numDeps, numObjs); -} - TpiSource *lld::coff::makeTpiSource(ObjFile *file) { return make(TpiSource::Regular, file); } @@ -210,67 +165,13 @@ TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file, return make(file, precomp); } -std::map TypeServerSource::mappings; - -std::map PrecompSource::mappings; - -bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { - if (ti.isSimple()) - return true; - - // This can be an item index or a type index. Choose the appropriate map. - ArrayRef tpiOrIpiMap = - (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; - if (ti.toArrayIndex() >= tpiOrIpiMap.size()) - return false; - ti = tpiOrIpiMap[ti.toArrayIndex()]; - return true; -} - -void TpiSource::remapRecord(MutableArrayRef rec, - ArrayRef typeRefs) { - MutableArrayRef contents = rec.drop_front(sizeof(RecordPrefix)); - for (const TiReference &ref : typeRefs) { - unsigned byteSize = ref.Count * sizeof(TypeIndex); - if (contents.size() < ref.Offset + byteSize) - fatal("symbol record too short"); - - MutableArrayRef indices( - reinterpret_cast(contents.data() + ref.Offset), ref.Count); - for (TypeIndex &ti : indices) { - if (!remapTypeIndex(ti, ref.Kind)) { - if (config->verbose) { - uint16_t kind = - reinterpret_cast(rec.data())->RecordKind; - StringRef fname = file ? file->getName() : ""; - log("failed to remap type index in record of kind 0x" + - utohexstr(kind) + " in " + fname + " with bad " + - (ref.Kind == TiRefKind::IndexRef ? "item" : "type") + - " index 0x" + utohexstr(ti.getIndex())); - } - ti = TypeIndex(SimpleTypeKind::NotTranslated); - continue; - } - } - } +void TpiSource::forEachSource(llvm::function_ref fn) { + for_each(gc, fn); } -void TpiSource::remapTypesInTypeRecord(MutableArrayRef rec) { - // TODO: Handle errors similar to symbols. - SmallVector typeRefs; - discoverTypeIndices(CVType(rec), typeRefs); - remapRecord(rec, typeRefs); -} +std::map TypeServerSource::mappings; -bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef rec) { - // Discover type index references in the record. Skip it if we don't - // know where they are. - SmallVector typeRefs; - if (!discoverTypeIndicesInSymbol(rec, typeRefs)) - return false; - remapRecord(rec, typeRefs); - return true; -} +std::map PrecompSource::mappings; // A COFF .debug$H section is currently a clang extension. This function checks // if a .debug$H section is in a format that we expect / understand, so that we @@ -302,6 +203,7 @@ static Optional> getDebugH(ObjFile *file) { static ArrayRef getHashesFromDebugH(ArrayRef debugH) { assert(canUseDebugH(debugH)); + debugH = debugH.drop_front(sizeof(object::debug_h_header)); uint32_t count = debugH.size() / sizeof(GloballyHashedType); return {reinterpret_cast(debugH.data()), count}; @@ -309,17 +211,32 @@ getHashesFromDebugH(ArrayRef debugH) { // Merge .debug$T for a generic object file. Error TpiSource::mergeDebugT(TypeMerger *m) { - assert(!config->debugGHashes && - "use remapTpiWithGHashes when ghash is enabled"); - CVTypeArray types; BinaryStreamReader reader(file->debugTypes, support::little); cantFail(reader.readArray(types, reader.getLength())); - if (auto err = mergeTypeAndIdRecords( - m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); + if (config->debugGHashes) { + ArrayRef hashes; + std::vector ownedHashes; + if (Optional> debugH = getDebugH(file)) + hashes = getHashesFromDebugH(*debugH); + else { + ownedHashes = GloballyHashedType::hashTypes(types); + hashes = ownedHashes; + } + + if (auto err = mergeTypeAndIdRecords(m->globalIDTable, m->globalTypeTable, + indexMapStorage, types, hashes, + file->pchSignature)) + fatal("codeview::mergeTypeAndIdRecords failed: " + + toString(std::move(err))); + } else { + if (auto err = + mergeTypeAndIdRecords(m->idTable, m->typeTable, indexMapStorage, + types, file->pchSignature)) + fatal("codeview::mergeTypeAndIdRecords failed: " + + toString(std::move(err))); + } // In an object, there is only one mapping for both types and items. tpiMap = indexMapStorage; @@ -350,9 +267,6 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { // Merge types from a type server PDB. Error TypeServerSource::mergeDebugT(TypeMerger *m) { - assert(!config->debugGHashes && - "use remapTpiWithGHashes when ghash is enabled"); - pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); Expected expectedTpi = pdbFile.getPDBTpiStream(); if (auto e = expectedTpi.takeError()) @@ -365,18 +279,45 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { maybeIpi = &*expectedIpi; } - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, - expectedTpi->typeArray())) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray())) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; + if (config->debugGHashes) { + // PDBs do not actually store global hashes, so when merging a type server + // PDB we have to synthesize global hashes. To do this, we first synthesize + // global hashes for the TPI stream, since it is independent, then we + // synthesize hashes for the IPI stream, using the hashes for the TPI stream + // as inputs. + auto tpiHashes = GloballyHashedType::hashTypes(expectedTpi->typeArray()); + Optional endPrecomp; + // Merge TPI first, because the IPI stream will reference type indices. + if (auto err = + mergeTypeRecords(m->globalTypeTable, indexMapStorage, + expectedTpi->typeArray(), tpiHashes, endPrecomp)) + fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); + tpiMap = indexMapStorage; + + // Merge IPI. + if (maybeIpi) { + auto ipiHashes = + GloballyHashedType::hashIds(maybeIpi->typeArray(), tpiHashes); + if (auto err = + mergeIdRecords(m->globalIDTable, tpiMap, ipiSrc->indexMapStorage, + maybeIpi->typeArray(), ipiHashes)) + fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); + ipiMap = ipiSrc->indexMapStorage; + } + } else { + // Merge TPI first, because the IPI stream will reference type indices. + if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, + expectedTpi->typeArray())) + fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); + tpiMap = indexMapStorage; + + // Merge IPI. + if (maybeIpi) { + if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, + maybeIpi->typeArray())) + fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); + ipiMap = ipiSrc->indexMapStorage; + } } if (config->showSummary) { @@ -396,7 +337,7 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { return Error::success(); } -Expected UseTypeServerSource::getTypeServerSource() { +Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { const codeview::GUID &tsId = typeServerDependency.getGuid(); StringRef tsPath = typeServerDependency.getName(); @@ -416,15 +357,8 @@ Expected UseTypeServerSource::getTypeServerSource() { tsSrc = (TypeServerSource *)pdb->debugTypesObj; } - return tsSrc; -} - -Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { - Expected tsSrc = getTypeServerSource(); - if (!tsSrc) - return tsSrc.takeError(); - pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); + pdb::PDBFile &pdbSession = tsSrc->pdbInputFile->session->getPDBFile(); auto expectedInfo = pdbSession.getPDBInfoStream(); if (!expectedInfo) return expectedInfo.takeError(); @@ -434,12 +368,12 @@ Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { // must match the GUID specified in the TypeServer2 record. if (expectedInfo->getGuid() != typeServerDependency.getGuid()) return createFileError( - typeServerDependency.getName(), + tsPath, make_error(pdb::pdb_error_code::signature_out_of_date)); // Reuse the type index map of the type server. - tpiMap = (*tsSrc)->tpiMap; - ipiMap = (*tsSrc)->ipiMap; + tpiMap = tsSrc->tpiMap; + ipiMap = tsSrc->ipiMap; return Error::success(); } @@ -465,28 +399,26 @@ static PrecompSource *findObjByName(StringRef fileNameOnly) { return nullptr; } -static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) { +static Expected findPrecompMap(ObjFile *file, + PrecompRecord &pr) { // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, // the paths embedded in the OBJs are in the Windows format. SmallString<128> prFileName = sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); + PrecompSource *precomp; auto it = PrecompSource::mappings.find(pr.getSignature()); if (it != PrecompSource::mappings.end()) { - return it->second; + precomp = it->second; + } else { + // Lookup by name + precomp = findObjByName(prFileName); } - // Lookup by name - return findObjByName(prFileName); -} - -static Expected findPrecompMap(ObjFile *file, - PrecompRecord &pr) { - PrecompSource *precomp = findPrecompSource(file, pr); if (!precomp) return createFileError( - pr.getPrecompFilePath(), + prFileName, make_error(pdb::pdb_error_code::no_matching_pch)); if (pr.getSignature() != file->pchSignature) @@ -505,8 +437,11 @@ static Expected findPrecompMap(ObjFile *file, /// Merges a precompiled headers TPI map into the current TPI map. The /// precompiled headers object will also be loaded and remapped in the /// process. -Error UsePrecompSource::mergeInPrecompHeaderObj() { - auto e = findPrecompMap(file, precompDependency); +static Error +mergeInPrecompHeaderObj(ObjFile *file, + SmallVectorImpl &indexMapStorage, + PrecompRecord &precomp) { + auto e = findPrecompMap(file, precomp); if (!e) return e.takeError(); @@ -514,17 +449,11 @@ Error UsePrecompSource::mergeInPrecompHeaderObj() { if (precompSrc->tpiMap.empty()) return Error::success(); - assert(precompDependency.getStartTypeIndex() == - TypeIndex::FirstNonSimpleIndex); - assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); + assert(precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex); + assert(precomp.getTypesCount() <= precompSrc->tpiMap.size()); // Use the previously remapped index map from the precompiled headers. indexMapStorage.append(precompSrc->tpiMap.begin(), - precompSrc->tpiMap.begin() + - precompDependency.getTypesCount()); - - if (config->debugGHashes) - funcIdToType = precompSrc->funcIdToType; // FIXME: Save copy - + precompSrc->tpiMap.begin() + precomp.getTypesCount()); return Error::success(); } @@ -533,7 +462,8 @@ Error UsePrecompSource::mergeDebugT(TypeMerger *m) { // precompiled headers object (/Yc) first. Some type indices in the current // object are referencing data in the precompiled headers object, so we need // both to be loaded. - if (Error e = mergeInPrecompHeaderObj()) + if (Error e = + mergeInPrecompHeaderObj(file, indexMapStorage, precompDependency)) return e; return TpiSource::mergeDebugT(m); @@ -548,586 +478,7 @@ uint32_t TpiSource::countPrecompObjs() { } void TpiSource::clear() { - // Clean up any owned ghash allocations. - clearGHashes(); - TpiSource::instances.clear(); + gc.clear(); TypeServerSource::mappings.clear(); PrecompSource::mappings.clear(); } - -//===----------------------------------------------------------------------===// -// Parellel GHash type merging implementation. -//===----------------------------------------------------------------------===// - -void TpiSource::loadGHashes() { - if (Optional> debugH = getDebugH(file)) { - ghashes = getHashesFromDebugH(*debugH); - ownedGHashes = false; - } else { - CVTypeArray types; - BinaryStreamReader reader(file->debugTypes, support::little); - cantFail(reader.readArray(types, reader.getLength())); - assignGHashesFromVector(GloballyHashedType::hashTypes(types)); - } - - fillIsItemIndexFromDebugT(); -} - -// Copies ghashes from a vector into an array. These are long lived, so it's -// worth the time to copy these into an appropriately sized vector to reduce -// memory usage. -void TpiSource::assignGHashesFromVector( - std::vector &&hashVec) { - GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; - memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); - ghashes = makeArrayRef(hashes, hashVec.size()); - ownedGHashes = true; -} - -// Faster way to iterate type records. forEachTypeChecked is faster than -// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. -static void forEachTypeChecked(ArrayRef types, - function_ref fn) { - checkError( - forEachCodeViewRecord(types, [fn](const CVType &ty) -> Error { - fn(ty); - return Error::success(); - })); -} - -// Walk over file->debugTypes and fill in the isItemIndex bit vector. -// TODO: Store this information in .debug$H so that we don't have to recompute -// it. This is the main bottleneck slowing down parallel ghashing with one -// thread over single-threaded ghashing. -void TpiSource::fillIsItemIndexFromDebugT() { - uint32_t index = 0; - isItemIndex.resize(ghashes.size()); - forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { - if (isIdRecord(ty.kind())) - isItemIndex.set(index); - ++index; - }); -} - -void TpiSource::mergeTypeRecord(CVType ty) { - // Decide if the merged type goes into TPI or IPI. - bool isItem = isIdRecord(ty.kind()); - MergedInfo &merged = isItem ? mergedIpi : mergedTpi; - - // Copy the type into our mutable buffer. - assert(ty.length() <= codeview::MaxRecordLength); - size_t offset = merged.recs.size(); - size_t newSize = alignTo(ty.length(), 4); - merged.recs.resize(offset + newSize); - auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize); - memcpy(newRec.data(), ty.data().data(), newSize); - - // Fix up the record prefix and padding bytes if it required resizing. - if (newSize != ty.length()) { - reinterpret_cast(newRec.data())->RecordLen = newSize - 2; - for (size_t i = ty.length(); i < newSize; ++i) - newRec[i] = LF_PAD0 + (newSize - i); - } - - // Remap the type indices in the new record. - remapTypesInTypeRecord(newRec); - uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); - merged.recSizes.push_back(static_cast(newSize)); - merged.recHashes.push_back(pdbHash); -} - -void TpiSource::mergeUniqueTypeRecords(ArrayRef typeRecords, - TypeIndex beginIndex) { - // Re-sort the list of unique types by index. - if (kind == PDB) - assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end())); - else - llvm::sort(uniqueTypes); - - // Accumulate all the unique types into one buffer in mergedTypes. - uint32_t ghashIndex = 0; - auto nextUniqueIndex = uniqueTypes.begin(); - assert(mergedTpi.recs.empty()); - assert(mergedIpi.recs.empty()); - forEachTypeChecked(typeRecords, [&](const CVType &ty) { - if (nextUniqueIndex != uniqueTypes.end() && - *nextUniqueIndex == ghashIndex) { - mergeTypeRecord(ty); - ++nextUniqueIndex; - } - if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { - bool success = ty.length() >= 12; - TypeIndex srcFuncIdIndex = beginIndex + ghashIndex; - TypeIndex funcId = srcFuncIdIndex; - TypeIndex funcType; - if (success) { - funcType = *reinterpret_cast(&ty.data()[8]); - success &= remapTypeIndex(funcId, TiRefKind::IndexRef); - success &= remapTypeIndex(funcType, TiRefKind::TypeRef); - } - if (success) { - funcIdToType.insert({funcId, funcType}); - } else { - StringRef fname = file ? file->getName() : ""; - warn("corrupt LF_[M]FUNC_ID record 0x" + - utohexstr(srcFuncIdIndex.getIndex()) + " in " + fname); - } - } - ++ghashIndex; - }); - assert(nextUniqueIndex == uniqueTypes.end() && - "failed to merge all desired records"); - assert(uniqueTypes.size() == - mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && - "missing desired record"); -} - -void TpiSource::remapTpiWithGHashes(GHashState *g) { - assert(config->debugGHashes && "ghashes must be enabled"); - fillMapFromGHashes(g, indexMapStorage); - tpiMap = indexMapStorage; - ipiMap = indexMapStorage; - mergeUniqueTypeRecords(file->debugTypes); - // TODO: Free all unneeded ghash resources now that we have a full index map. -} - -// PDBs do not actually store global hashes, so when merging a type server -// PDB we have to synthesize global hashes. To do this, we first synthesize -// global hashes for the TPI stream, since it is independent, then we -// synthesize hashes for the IPI stream, using the hashes for the TPI stream -// as inputs. -void TypeServerSource::loadGHashes() { - // Don't hash twice. - if (!ghashes.empty()) - return; - pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); - - // Hash TPI stream. - Expected expectedTpi = pdbFile.getPDBTpiStream(); - if (auto e = expectedTpi.takeError()) - fatal("Type server does not have TPI stream: " + toString(std::move(e))); - assignGHashesFromVector( - GloballyHashedType::hashTypes(expectedTpi->typeArray())); - isItemIndex.resize(ghashes.size()); - - // Hash IPI stream, which depends on TPI ghashes. - if (!pdbFile.hasPDBIpiStream()) - return; - Expected expectedIpi = pdbFile.getPDBIpiStream(); - if (auto e = expectedIpi.takeError()) - fatal("error retreiving IPI stream: " + toString(std::move(e))); - ipiSrc->assignGHashesFromVector( - GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); - - // The IPI stream isItemIndex bitvector should be all ones. - ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); - ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); -} - -// Flatten discontiguous PDB type arrays to bytes so that we can use -// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from -// type servers is faster than iterating all object files compiled with /Z7 with -// CVTypeArray, which has high overheads due to the virtual interface of -// BinaryStream::readBytes. -static ArrayRef typeArrayToBytes(const CVTypeArray &types) { - BinaryStreamRef stream = types.getUnderlyingStream(); - ArrayRef debugTypes; - checkError(stream.readBytes(0, stream.getLength(), debugTypes)); - return debugTypes; -} - -// Merge types from a type server PDB. -void TypeServerSource::remapTpiWithGHashes(GHashState *g) { - assert(config->debugGHashes && "ghashes must be enabled"); - - // IPI merging depends on TPI, so do TPI first, then do IPI. No need to - // propagate errors, those should've been handled during ghash loading. - pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); - pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); - fillMapFromGHashes(g, indexMapStorage); - tpiMap = indexMapStorage; - mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); - if (pdbFile.hasPDBIpiStream()) { - pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); - ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); - ipiSrc->fillMapFromGHashes(g, ipiSrc->indexMapStorage); - ipiMap = ipiSrc->indexMapStorage; - ipiSrc->tpiMap = tpiMap; - ipiSrc->ipiMap = ipiMap; - ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); - funcIdToType = ipiSrc->funcIdToType; // FIXME: Save copy - } -} - -void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { - // No remapping to do with /Zi objects. Simply use the index map from the type - // server. Errors should have been reported earlier. Symbols from this object - // will be ignored. - Expected maybeTsSrc = getTypeServerSource(); - if (!maybeTsSrc) { - typeMergingError = maybeTsSrc.takeError(); - return; - } - TypeServerSource *tsSrc = *maybeTsSrc; - tpiMap = tsSrc->tpiMap; - ipiMap = tsSrc->ipiMap; - funcIdToType = tsSrc->funcIdToType; // FIXME: Save copy -} - -void PrecompSource::loadGHashes() { - if (getDebugH(file)) { - warn("ignoring .debug$H section; pch with ghash is not implemented"); - } - - uint32_t ghashIdx = 0; - std::vector hashVec; - forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { - // Remember the index of the LF_ENDPRECOMP record so it can be excluded from - // the PDB. There must be an entry in the list of ghashes so that the type - // indexes of the following records in the /Yc PCH object line up. - if (ty.kind() == LF_ENDPRECOMP) - endPrecompGHashIdx = ghashIdx; - - hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); - isItemIndex.push_back(isIdRecord(ty.kind())); - ++ghashIdx; - }); - assignGHashesFromVector(std::move(hashVec)); -} - -void UsePrecompSource::loadGHashes() { - PrecompSource *pchSrc = findPrecompSource(file, precompDependency); - if (!pchSrc) - return; - - // To compute ghashes of a /Yu object file, we need to build on the the - // ghashes of the /Yc PCH object. After we are done hashing, discard the - // ghashes from the PCH source so we don't unnecessarily try to deduplicate - // them. - std::vector hashVec = - pchSrc->ghashes.take_front(precompDependency.getTypesCount()); - forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { - hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); - isItemIndex.push_back(isIdRecord(ty.kind())); - }); - hashVec.erase(hashVec.begin(), - hashVec.begin() + precompDependency.getTypesCount()); - assignGHashesFromVector(std::move(hashVec)); -} - -void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { - // This object was compiled with /Yu, so process the corresponding - // precompiled headers object (/Yc) first. Some type indices in the current - // object are referencing data in the precompiled headers object, so we need - // both to be loaded. - if (Error e = mergeInPrecompHeaderObj()) { - typeMergingError = std::move(e); - return; - } - - fillMapFromGHashes(g, indexMapStorage); - tpiMap = indexMapStorage; - ipiMap = indexMapStorage; - mergeUniqueTypeRecords(file->debugTypes, - TypeIndex(precompDependency.getStartTypeIndex() + - precompDependency.getTypesCount())); -} - -namespace { -/// A concurrent hash table for global type hashing. It is based on this paper: -/// Concurrent Hash Tables: Fast and General(?)! -/// https://dl.acm.org/doi/10.1145/3309206 -/// -/// This hash table is meant to be used in two phases: -/// 1. concurrent insertions -/// 2. concurrent reads -/// It does not support lookup, deletion, or rehashing. It uses linear probing. -/// -/// The paper describes storing a key-value pair in two machine words. -/// Generally, the values stored in this map are type indices, and we can use -/// those values to recover the ghash key from a side table. This allows us to -/// shrink the table entries further at the cost of some loads, and sidesteps -/// the need for a 128 bit atomic compare-and-swap operation. -/// -/// During insertion, a priority function is used to decide which insertion -/// should be preferred. This ensures that the output is deterministic. For -/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. -/// -class GHashCell; -struct GHashTable { - GHashCell *table = nullptr; - uint32_t tableSize = 0; - - GHashTable() = default; - ~GHashTable(); - - /// Initialize the table with the given size. Because the table cannot be - /// resized, the initial size of the table must be large enough to contain all - /// inputs, or insertion may not be able to find an empty cell. - void init(uint32_t newTableSize); - - /// Insert the cell with the given ghash into the table. Return the insertion - /// position in the table. It is safe for the caller to store the insertion - /// position because the table cannot be resized. - uint32_t insert(GloballyHashedType ghash, GHashCell newCell); -}; - -/// A ghash table cell for deduplicating types from TpiSources. -class GHashCell { - uint64_t data = 0; - -public: - GHashCell() = default; - - // Construct data most to least significant so that sorting works well: - // - isItem - // - tpiSrcIdx - // - ghashIdx - // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a - // non-zero representation. - GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) - : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | - ghashIdx) { - assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); - assert(ghashIdx == getGHashIdx() && "round trip failure"); - } - - explicit GHashCell(uint64_t data) : data(data) {} - - // The empty cell is all zeros. - bool isEmpty() const { return data == 0ULL; } - - /// Extract the tpiSrcIdx. - uint32_t getTpiSrcIdx() const { - return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; - } - - /// Extract the index into the ghash array of the TpiSource. - uint32_t getGHashIdx() const { return (uint32_t)data; } - - bool isItem() const { return data & (1ULL << 63U); } - - /// Get the ghash key for this cell. - GloballyHashedType getGHash() const { - return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()]; - } - - /// The priority function for the cell. The data is stored such that lower - /// tpiSrcIdx and ghashIdx values are preferred, which means that type record - /// from earlier sources are more likely to prevail. - friend inline bool operator<(const GHashCell &l, const GHashCell &r) { - return l.data < r.data; - } -}; -} // namespace - -namespace lld { -namespace coff { -/// This type is just a wrapper around GHashTable with external linkage so it -/// can be used from a header. -struct GHashState { - GHashTable table; -}; -} // namespace coff -} // namespace lld - -GHashTable::~GHashTable() { delete[] table; } - -void GHashTable::init(uint32_t newTableSize) { - table = new GHashCell[newTableSize]; - memset(table, 0, newTableSize * sizeof(GHashCell)); - tableSize = newTableSize; -} - -uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) { - assert(!newCell.isEmpty() && "cannot insert empty cell value"); - - // FIXME: The low bytes of SHA1 have low entropy for short records, which - // type records are. Swap the byte order for better entropy. A better ghash - // won't need this. - uint32_t startIdx = - ByteSwap_64(*reinterpret_cast(&ghash)) % tableSize; - - // Do a linear probe starting at startIdx. - uint32_t idx = startIdx; - while (true) { - // Run a compare and swap loop. There are four cases: - // - cell is empty: CAS into place and return - // - cell has matching key, earlier priority: do nothing, return - // - cell has matching key, later priority: CAS into place and return - // - cell has non-matching key: hash collision, probe next cell - auto *cellPtr = reinterpret_cast *>(&table[idx]); - GHashCell oldCell(cellPtr->load()); - while (oldCell.isEmpty() || oldCell.getGHash() == ghash) { - // Check if there is an existing ghash entry with a higher priority - // (earlier ordering). If so, this is a duplicate, we are done. - if (!oldCell.isEmpty() && oldCell < newCell) - return idx; - // Either the cell is empty, or our value is higher priority. Try to - // compare and swap. If it succeeds, we are done. - if (cellPtr->compare_exchange_weak(oldCell, newCell)) - return idx; - // If the CAS failed, check this cell again. - } - - // Advance the probe. Wrap around to the beginning if we run off the end. - ++idx; - idx = idx == tableSize ? 0 : idx; - if (idx == startIdx) { - // If this becomes an issue, we could mark failure and rehash from the - // beginning with a bigger table. There is no difference between rehashing - // internally and starting over. - report_fatal_error("ghash table is full"); - } - } - llvm_unreachable("left infloop"); -} - -TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc) - : typeTable(alloc), idTable(alloc) {} - -TypeMerger::~TypeMerger() = default; - -void TypeMerger::mergeTypesWithGHash() { - // Load ghashes. Do type servers and PCH objects first. - { - ScopedTimer t1(loadGHashTimer); - parallelForEach(TpiSource::dependencySources, - [&](TpiSource *source) { source->loadGHashes(); }); - parallelForEach(TpiSource::objectSources, - [&](TpiSource *source) { source->loadGHashes(); }); - } - - ScopedTimer t2(mergeGHashTimer); - GHashState ghashState; - - // Estimate the size of hash table needed to deduplicate ghashes. This *must* - // be larger than the number of unique types, or hash table insertion may not - // be able to find a vacant slot. Summing the input types guarantees this, but - // it is a gross overestimate. The table size could be reduced to save memory, - // but it would require implementing rehashing, and this table is generally - // small compared to total memory usage, at eight bytes per input type record, - // and most input type records are larger than eight bytes. - size_t tableSize = 0; - for (TpiSource *source : TpiSource::instances) - tableSize += source->ghashes.size(); - - // Cap the table size so that we can use 32-bit cell indices. Type indices are - // also 32-bit, so this is an inherent PDB file format limit anyway. - tableSize = std::min(size_t(INT32_MAX), tableSize); - ghashState.table.init(static_cast(tableSize)); - - // Insert ghashes in parallel. During concurrent insertion, we cannot observe - // the contents of the hash table cell, but we can remember the insertion - // position. Because the table does not rehash, the position will not change - // under insertion. After insertion is done, the value of the cell can be read - // to retreive the final PDB type index. - parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) { - TpiSource *source = TpiSource::instances[tpiSrcIdx]; - source->indexMapStorage.resize(source->ghashes.size()); - for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { - if (source->shouldOmitFromPdb(i)) { - source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); - continue; - } - GloballyHashedType ghash = source->ghashes[i]; - bool isItem = source->isItemIndex.test(i); - uint32_t cellIdx = - ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i)); - - // Store the ghash cell index as a type index in indexMapStorage. Later - // we will replace it with the PDB type index. - source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); - } - }); - - // Collect all non-empty cells and sort them. This will implicitly assign - // destination type indices, and partition the entries into type records and - // item records. It arranges types in this order: - // - type records - // - source 0, type 0... - // - source 1, type 1... - // - item records - // - source 0, type 1... - // - source 1, type 0... - std::vector entries; - for (const GHashCell &cell : - makeArrayRef(ghashState.table.table, tableSize)) { - if (!cell.isEmpty()) - entries.push_back(cell); - } - parallelSort(entries, std::less()); - log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n", - double(entries.size()) / tableSize, entries.size(), tableSize)); - - // Find out how many type and item indices there are. - auto mid = - std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0)); - assert((mid == entries.end() || mid->isItem()) && - (mid == entries.begin() || !std::prev(mid)->isItem()) && - "midpoint is not midpoint"); - uint32_t numTypes = std::distance(entries.begin(), mid); - uint32_t numItems = std::distance(mid, entries.end()); - log("Tpi record count: " + Twine(numTypes)); - log("Ipi record count: " + Twine(numItems)); - - // Make a list of the "unique" type records to merge for each tpi source. Type - // merging will skip indices not on this list. Store the destination PDB type - // index for these unique types in the tpiMap for each source. The entries for - // non-unique types will be filled in prior to type merging. - for (uint32_t i = 0, e = entries.size(); i < e; ++i) { - auto &cell = entries[i]; - uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); - TpiSource *source = TpiSource::instances[tpiSrcIdx]; - source->uniqueTypes.push_back(cell.getGHashIdx()); - - // Update the ghash table to store the destination PDB type index in the - // table. - uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; - uint32_t ghashCellIndex = - source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); - ghashState.table.table[ghashCellIndex] = - GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); - } - - // In parallel, remap all types. - for_each(TpiSource::dependencySources, [&](TpiSource *source) { - source->remapTpiWithGHashes(&ghashState); - }); - parallelForEach(TpiSource::objectSources, [&](TpiSource *source) { - source->remapTpiWithGHashes(&ghashState); - }); - - TpiSource::clearGHashes(); -} - -/// Given the index into the ghash table for a particular type, return the type -/// index for that type in the output PDB. -static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, - uint32_t ghashCellIdx) { - GHashCell cell = g->table.table[ghashCellIdx]; - return TypeIndex::fromArrayIndex(cell.getGHashIdx()); -} - -// Fill in a TPI or IPI index map using ghashes. For each source type, use its -// ghash to lookup its final type index in the PDB, and store that in the map. -void TpiSource::fillMapFromGHashes(GHashState *g, - SmallVectorImpl &mapToFill) { - for (size_t i = 0, e = ghashes.size(); i < e; ++i) { - TypeIndex fakeCellIndex = indexMapStorage[i]; - if (fakeCellIndex.isSimple()) - mapToFill[i] = fakeCellIndex; - else - mapToFill[i] = loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); - } -} - -void TpiSource::clearGHashes() { - for (TpiSource *src : TpiSource::instances) { - if (src->ownedGHashes) - delete[] src->ghashes.data(); - src->ghashes = {}; - src->isItemIndex.clear(); - src->uniqueTypes.clear(); - } -} diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h index 17368244e5898..f97c0f7617445 100644 --- a/lld/COFF/DebugTypes.h +++ b/lld/COFF/DebugTypes.h @@ -10,37 +10,32 @@ #define LLD_COFF_DEBUGTYPES_H #include "lld/Common/LLVM.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { namespace codeview { -struct GloballyHashedType; +class PrecompRecord; +class TypeServer2Record; } // namespace codeview namespace pdb { class NativeSession; -class TpiStream; } } // namespace llvm namespace lld { namespace coff { -using llvm::codeview::GloballyHashedType; using llvm::codeview::TypeIndex; class ObjFile; class PDBInputFile; class TypeMerger; -struct GHashState; class TpiSource { public: - enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; + enum TpiKind { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; TpiSource(TpiKind k, ObjFile *f); virtual ~TpiSource(); @@ -58,97 +53,21 @@ class TpiSource { /// caller-provided ObjectIndexMap. virtual Error mergeDebugT(TypeMerger *m); - /// Load global hashes, either by hashing types directly, or by loading them - /// from LLVM's .debug$H section. - virtual void loadGHashes(); - - /// Use global hashes to merge type information. - virtual void remapTpiWithGHashes(GHashState *g); - - // Remap a type index in place. - bool remapTypeIndex(TypeIndex &ti, llvm::codeview::TiRefKind refKind) const; - -protected: - void remapRecord(MutableArrayRef rec, - ArrayRef typeRefs); - - void mergeTypeRecord(llvm::codeview::CVType ty); - - // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex - // of the first record in this source, typically 0x1000. When PCHs are - // involved, it may start higher. - void mergeUniqueTypeRecords( - ArrayRef debugTypes, - TypeIndex beginIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex)); - - // Use the ghash table to construct a map from source type index to - // destination PDB type index. Usable for either TPI or IPI. - void fillMapFromGHashes(GHashState *m, - llvm::SmallVectorImpl &indexMap); - - // Copies ghashes from a vector into an array. These are long lived, so it's - // worth the time to copy these into an appropriately sized vector to reduce - // memory usage. - void assignGHashesFromVector(std::vector &&hashVec); - - // Walk over file->debugTypes and fill in the isItemIndex bit vector. - void fillIsItemIndexFromDebugT(); - -public: - bool remapTypesInSymbolRecord(MutableArrayRef rec); - - void remapTypesInTypeRecord(MutableArrayRef rec); - /// Is this a dependent file that needs to be processed first, before other /// OBJs? virtual bool isDependency() const { return false; } - /// Returns true if this type record should be omitted from the PDB, even if - /// it is unique. This prevents a record from being added to the input ghash - /// table. - bool shouldOmitFromPdb(uint32_t ghashIdx) { - return ghashIdx == endPrecompGHashIdx; - } - - /// All sources of type information in the program. - static std::vector instances; - - /// Dependency type sources, such as type servers or PCH object files. These - /// must be processed before objects that rely on them. Set by - /// TpiSources::sortDependencies. - static ArrayRef dependencySources; - - /// Object file sources. These must be processed after dependencySources. - static ArrayRef objectSources; - - /// Sorts the dependencies and reassigns TpiSource indices. - static void sortDependencies(); + static void forEachSource(llvm::function_ref fn); static uint32_t countTypeServerPDBs(); static uint32_t countPrecompObjs(); - /// Free heap allocated ghashes. - static void clearGHashes(); - /// Clear global data structures for TpiSources. static void clear(); const TpiKind kind; - bool ownedGHashes = true; - uint32_t tpiSrcIdx = 0; - -protected: - /// The ghash index (zero based, not 0x1000-based) of the LF_ENDPRECOMP record - /// in this object, if one exists. This is the all ones value otherwise. It is - /// recorded here so that it can be omitted from the final ghash table. - uint32_t endPrecompGHashIdx = ~0U; - -public: ObjFile *file; - /// An error encountered during type merging, if any. - Error typeMergingError = Error::success(); - // Storage for tpiMap or ipiMap, depending on the kind of source. llvm::SmallVector indexMapStorage; @@ -157,31 +76,6 @@ class TpiSource { // objects. llvm::ArrayRef tpiMap; llvm::ArrayRef ipiMap; - - /// Array of global type hashes, indexed by TypeIndex. May be calculated on - /// demand, or present in input object files. - llvm::ArrayRef ghashes; - - /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function - /// type index here. Both indices are PDB indices, not object type indexes. - llvm::DenseMap funcIdToType; - - /// Indicates if a type record is an item index or a type index. - llvm::BitVector isItemIndex; - - /// A list of all "unique" type indices which must be merged into the final - /// PDB. GHash type deduplication produces this list, and it should be - /// considerably smaller than the input. - std::vector uniqueTypes; - - struct MergedInfo { - std::vector recs; - std::vector recSizes; - std::vector recHashes; - }; - - MergedInfo mergedTpi; - MergedInfo mergedIpi; }; TpiSource *makeTpiSource(ObjFile *file); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 56717de226c29..fb496a1c106f2 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -69,13 +69,13 @@ bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS, lld::stderrOS = &stderrOS; errorHandler().cleanupCallback = []() { - TpiSource::clear(); freeArena(); ObjFile::instances.clear(); PDBInputFile::instances.clear(); ImportFile::instances.clear(); BitcodeFile::instances.clear(); memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances)); + TpiSource::clear(); OutputSection::clear(); }; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 9cf5e42c0b6aa..bfa7bd8148dfd 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -66,8 +66,7 @@ using llvm::object::coff_section; static ExitOnError exitOnErr; static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); -Timer lld::coff::loadGHashTimer("Global Type Hashing", totalPdbLinkTimer); -Timer lld::coff::mergeGHashTimer("GHash Type Merging", totalPdbLinkTimer); + static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer); static Timer typeMergingTimer("Type Merging", addObjectsTimer); static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer); @@ -113,6 +112,8 @@ class PDBLinker { /// externally. void addDebug(TpiSource *source); + bool mergeTypeRecords(TpiSource *source); + void addDebugSymbols(TpiSource *source); void mergeSymbolRecords(TpiSource *source, @@ -249,18 +250,43 @@ static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder, }); } -static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) { - // Start the TPI or IPI stream header. - builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80); - builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80); - for_each(TpiSource::instances, [&](TpiSource *source) { - builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs, - source->mergedTpi.recSizes, - source->mergedTpi.recHashes); - builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs, - source->mergedIpi.recSizes, - source->mergedIpi.recHashes); - }); +static bool remapTypeIndex(TypeIndex &ti, ArrayRef typeIndexMap) { + if (ti.isSimple()) + return true; + if (ti.toArrayIndex() >= typeIndexMap.size()) + return false; + ti = typeIndexMap[ti.toArrayIndex()]; + return true; +} + +static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind, + MutableArrayRef recordBytes, + TpiSource *source, + ArrayRef typeRefs) { + MutableArrayRef contents = + recordBytes.drop_front(sizeof(RecordPrefix)); + for (const TiReference &ref : typeRefs) { + unsigned byteSize = ref.Count * sizeof(TypeIndex); + if (contents.size() < ref.Offset + byteSize) + fatal("symbol record too short"); + + // This can be an item index or a type index. Choose the appropriate map. + bool isItemIndex = ref.Kind == TiRefKind::IndexRef; + ArrayRef typeOrItemMap = + isItemIndex ? source->ipiMap : source->tpiMap; + + MutableArrayRef tIs( + reinterpret_cast(contents.data() + ref.Offset), ref.Count); + for (TypeIndex &ti : tIs) { + if (!remapTypeIndex(ti, typeOrItemMap)) { + log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " + + file->getName() + " with bad " + (isItemIndex ? "item" : "type") + + " index 0x" + utohexstr(ti.getIndex())); + ti = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + } + } } static void @@ -303,7 +329,7 @@ static SymbolKind symbolKind(ArrayRef recordData) { /// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32 static void translateIdSymbols(MutableArrayRef &recordData, - TypeMerger &tMerger, TpiSource *source) { + TypeCollection &idTable) { RecordPrefix *prefix = reinterpret_cast(recordData.data()); SymbolKind kind = symbolKind(recordData); @@ -330,25 +356,13 @@ static void translateIdSymbols(MutableArrayRef &recordData, reinterpret_cast(content.data() + refs[0].Offset); // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in // the IPI stream, whose `FunctionType` member refers to the TPI stream. - // Note that LF_FUNC_ID and LF_MFUNC_ID have the same record layout, and + // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and // in both cases we just need the second type index. if (!ti->isSimple() && !ti->isNoneType()) { - if (config->debugGHashes) { - auto idToType = source->funcIdToType.find(*ti); - if (idToType == source->funcIdToType.end()) { - warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item " - "index {1:X} which is not a LF_[M]FUNC_ID record", - source->file->getName(), ti->getIndex())); - *ti = TypeIndex(SimpleTypeKind::NotTranslated); - } else { - *ti = idToType->second; - } - } else { - CVType funcIdData = tMerger.getIDTable().getType(*ti); - ArrayRef tiBuf = funcIdData.data().slice(8, 4); - assert(tiBuf.size() == 4 && "corrupt LF_[M]FUNC_ID record"); - *ti = *reinterpret_cast(tiBuf.data()); - } + CVType funcIdData = idTable.getType(*ti); + ArrayRef tiBuf = funcIdData.data().slice(8, 4); + assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record"); + *ti = *reinterpret_cast(tiBuf.data()); } kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32 @@ -547,16 +561,22 @@ void PDBLinker::mergeSymbolRecords(TpiSource *source, const_cast(sym.data().data()), sym.length()); } - // Re-map all the type index references. - if (!source->remapTypesInSymbolRecord(recordBytes)) { - log("error remapping types in symbol of kind 0x" + - utohexstr(sym.kind()) + ", ignoring"); + // Discover type index references in the record. Skip it if we don't + // know where they are. + SmallVector typeRefs; + if (!discoverTypeIndicesInSymbol(sym, typeRefs)) { + log("ignoring unknown symbol record with kind 0x" + + utohexstr(sym.kind())); return Error::success(); } + // Re-map all the type index references. + remapTypesInSymbolRecord(file, sym.kind(), recordBytes, source, + typeRefs); + // An object file may have S_xxx_ID symbols, but these get converted to // "real" symbols in a PDB. - translateIdSymbols(recordBytes, tMerger, source); + translateIdSymbols(recordBytes, tMerger.getIDTable()); sym = CVSymbol(recordBytes); // If this record refers to an offset in the object file's string table, @@ -728,15 +748,11 @@ void DebugSHandler::mergeInlineeLines( const DebugSubsectionRecord &inlineeSubsection) { DebugInlineeLinesSubsectionRef inlineeLines; exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData())); - if (!source) { - warn("ignoring inlinee lines section in file that lacks type information"); - return; - } // Remap type indices in inlinee line records in place. for (const InlineeSourceLine &line : inlineeLines) { TypeIndex &inlinee = *const_cast(&line.Header->Inlinee); - if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) { + if (!remapTypeIndex(inlinee, source->ipiMap)) { log("bad inlinee line record in " + file.getName() + " with bad inlinee index 0x" + utohexstr(inlinee.getIndex())); } @@ -811,6 +827,20 @@ static void warnUnusable(InputFile *f, Error e) { warn(msg); } +bool PDBLinker::mergeTypeRecords(TpiSource *source) { + ScopedTimer t(typeMergingTimer); + // Before we can process symbol substreams from .debug$S, we need to process + // type information, file checksums, and the string table. Add type info to + // the PDB first, so that we can get the map from object file type and item + // indices to PDB type and item indices. + if (Error e = source->mergeDebugT(&tMerger)) { + // If the .debug$T sections fail to merge, assume there is no debug info. + warnUnusable(source->file, std::move(e)); + return false; + } + return true; +} + // Allocate memory for a .debug$S / .debug$F section and relocate it. static ArrayRef relocateDebugChunk(SectionChunk &debugChunk) { uint8_t *buffer = bAlloc.Allocate(debugChunk.getSize()); @@ -890,27 +920,9 @@ static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) { } void PDBLinker::addDebug(TpiSource *source) { - // Before we can process symbol substreams from .debug$S, we need to process - // type information, file checksums, and the string table. Add type info to - // the PDB first, so that we can get the map from object file type and item - // indices to PDB type and item indices. If we are using ghashes, types have - // already been merged. - if (!config->debugGHashes) { - ScopedTimer t(typeMergingTimer); - if (Error e = source->mergeDebugT(&tMerger)) { - // If type merging failed, ignore the symbols. - warnUnusable(source->file, std::move(e)); - return; - } - } else { - // If type merging failed, ignore the symbols. - if (source->typeMergingError) { - warnUnusable(source->file, std::move(source->typeMergingError)); - return; - } - } - - addDebugSymbols(source); + // If type merging failed, ignore the symbols. + if (mergeTypeRecords(source)) + addDebugSymbols(source); } static pdb::BulkPublic createPublic(Defined *def) { @@ -943,31 +955,25 @@ void PDBLinker::addObjectsToPDB() { for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(builder, obj); }); - // Reorder dependency type sources to come first. - TpiSource::sortDependencies(); - - // Merge type information from input files using global type hashing. - if (config->debugGHashes) - tMerger.mergeTypesWithGHash(); + // Merge dependencies + TpiSource::forEachSource([&](TpiSource *source) { + if (source->isDependency()) + addDebug(source); + }); - // Merge dependencies and then regular objects. - for_each(TpiSource::dependencySources, - [&](TpiSource *source) { addDebug(source); }); - for_each(TpiSource::objectSources, - [&](TpiSource *source) { addDebug(source); }); + // Merge regular and dependent OBJs + TpiSource::forEachSource([&](TpiSource *source) { + if (!source->isDependency()) + addDebug(source); + }); builder.getStringTableBuilder().setStrings(pdbStrTab); t1.stop(); // Construct TPI and IPI stream contents. ScopedTimer t2(tpiStreamLayoutTimer); - // Collect all the merged types. - if (config->debugGHashes) { - addGHashTypeInfo(builder); - } else { - addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); - addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); - } + addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); + addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); t2.stop(); } @@ -1008,8 +1014,8 @@ void PDBLinker::printStats() { "Input OBJ files (expanded from all cmd-line inputs)"); print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies"); print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies"); - print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records"); - print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records"); + print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(), + "Merged TPI records"); print(pdbStrTab.size(), "Output PDB strings"); print(globalSymbols, "Global symbol records"); print(moduleSymbols, "Module symbol records"); @@ -1061,11 +1067,8 @@ void PDBLinker::printStats() { } }; - if (!config->debugGHashes) { - // FIXME: Reimplement for ghash. - printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); - printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); - } + printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); + printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); message(buffer); } diff --git a/lld/COFF/PDB.h b/lld/COFF/PDB.h index 53506d40baef4..273609ea788c5 100644 --- a/lld/COFF/PDB.h +++ b/lld/COFF/PDB.h @@ -20,8 +20,6 @@ union DebugInfo; } namespace lld { -class Timer; - namespace coff { class OutputSection; class SectionChunk; @@ -34,10 +32,6 @@ void createPDB(SymbolTable *symtab, llvm::Optional> getFileLineCodeView(const SectionChunk *c, uint32_t addr); - -extern Timer loadGHashTimer; -extern Timer mergeGHashTimer; - } // namespace coff } // namespace lld diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h index be877cfda6e6b..d3184a7f18d74 100644 --- a/lld/COFF/TypeMerger.h +++ b/lld/COFF/TypeMerger.h @@ -10,47 +10,45 @@ #define LLD_COFF_TYPEMERGER_H #include "Config.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" -#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/Support/Allocator.h" -#include namespace lld { namespace coff { -using llvm::codeview::GloballyHashedType; -using llvm::codeview::TypeIndex; - -struct GHashState; - class TypeMerger { public: - TypeMerger(llvm::BumpPtrAllocator &alloc); - - ~TypeMerger(); + TypeMerger(llvm::BumpPtrAllocator &alloc) + : typeTable(alloc), idTable(alloc), globalTypeTable(alloc), + globalIDTable(alloc) {} /// Get the type table or the global type table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getTypeTable() { - assert(!config->debugGHashes); + if (config->debugGHashes) + return globalTypeTable; return typeTable; } /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getIDTable() { - assert(!config->debugGHashes); + if (config->debugGHashes) + return globalIDTable; return idTable; } - /// Use global hashes to eliminate duplicate types and identify unique type - /// indices in each TpiSource. - void mergeTypesWithGHash(); - /// Type records that will go into the PDB TPI stream. llvm::codeview::MergingTypeTableBuilder typeTable; /// Item records that will go into the PDB IPI stream. llvm::codeview::MergingTypeTableBuilder idTable; + /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH) + llvm::codeview::GlobalTypeTableBuilder globalTypeTable; + + /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH) + llvm::codeview::GlobalTypeTableBuilder globalIDTable; + // When showSummary is enabled, these are histograms of TPI and IPI records // keyed by type index. SmallVector tpiCounts; diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index 79a5940823bdf..4ffc564e67e2f 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -153,13 +153,6 @@ template T check(Expected e) { return std::move(*e); } -// Don't move from Expected wrappers around references. -template T &check(Expected e) { - if (!e) - fatal(llvm::toString(e.takeError())); - return *e; -} - template T check2(ErrorOr e, llvm::function_ref prefix) { if (auto ec = e.getError()) diff --git a/lld/test/COFF/pdb-global-hashes.test b/lld/test/COFF/pdb-global-hashes.test index 430275b7a8848..13039d42fe26a 100644 --- a/lld/test/COFF/pdb-global-hashes.test +++ b/lld/test/COFF/pdb-global-hashes.test @@ -2,7 +2,7 @@ RUN: yaml2obj %p/Inputs/pdb-hashes-1.yaml -o %t.1.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2.yaml -o %t.2.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2-missing.yaml -o %t.2.missing.obj RUN: lld-link /debug %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.nohash.pdb -RUN: lld-link /debug:ghash -verbose %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb +RUN: lld-link /debug:ghash %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb RUN: lld-link /debug:ghash %t.1.obj %t.2.missing.obj /entry:main /nodefaultlib /PDB:%t.mixed.pdb RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.nohash.pdb | FileCheck %s RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.hash.pdb | FileCheck %s diff --git a/lld/test/COFF/pdb-procid-remapping.test b/lld/test/COFF/pdb-procid-remapping.test index adc93585f2aac..d7ea775be98e7 100644 --- a/lld/test/COFF/pdb-procid-remapping.test +++ b/lld/test/COFF/pdb-procid-remapping.test @@ -1,12 +1,8 @@ -# RUN: yaml2obj < %p/Inputs/pdb1.yaml > %t1.obj -# RUN: yaml2obj < %p/Inputs/pdb2.yaml > %t2.obj - +# RUN: yaml2obj %p/Inputs/pdb1.yaml -o %t1.obj +# RUN: yaml2obj %p/Inputs/pdb2.yaml -o %t2.obj # RUN: lld-link /debug /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ # RUN: %t1.obj %t2.obj -# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s -# RUN: lld-link /debug /debug:ghash /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ -# RUN: %t1.obj %t2.obj # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s CHECK: Symbols diff --git a/lld/test/COFF/pdb-type-server-missing.yaml b/lld/test/COFF/pdb-type-server-missing.yaml index 78ddc0e4adb28..1a8c9a05c3d9c 100644 --- a/lld/test/COFF/pdb-type-server-missing.yaml +++ b/lld/test/COFF/pdb-type-server-missing.yaml @@ -5,7 +5,6 @@ # RUN: yaml2obj %s -o %t1.obj # RUN: yaml2obj %p/Inputs/pdb-type-server-missing-2.yaml -o %t2.obj # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN -# RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug:ghash -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 2>&1 | FileCheck %s -check-prefix=IGNORE -allow-empty # RUN: not lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /WX 2>&1 | FileCheck %s -check-prefix=ERR # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 /WX 2>&1 | FileCheck %s -check-prefix=IGNORE-ERR -allow-empty diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test index b954712d9b6c3..bcba6da28b690 100644 --- a/lld/test/COFF/pdb-type-server-simple.test +++ b/lld/test/COFF/pdb-type-server-simple.test @@ -20,11 +20,7 @@ RUN: rm -rf %t && mkdir -p %t && cd %t RUN: yaml2obj %S/Inputs/pdb-type-server-simple-a.yaml -o a.obj RUN: yaml2obj %S/Inputs/pdb-type-server-simple-b.yaml -o b.obj RUN: llvm-pdbutil yaml2pdb %S/Inputs/pdb-type-server-simple-ts.yaml -pdb ts.pdb -RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib -summary | FileCheck %s -check-prefix SUMMARY -RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s - -Re-run with /DEBUG:GHASH -RUN: lld-link a.obj b.obj -entry:main -debug:ghash -out:t.exe -pdb:t.pdb -nodefaultlib -summary -verbose +RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib /summary | FileCheck %s -check-prefix SUMMARY RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s @@ -105,8 +101,7 @@ SUMMARY-NEXT: ------------------------------------------------------------------ SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 1 PDB type server dependencies SUMMARY-NEXT: 0 Precomp OBJ dependencies -SUMMARY-NEXT: 9 Merged TPI records -SUMMARY-NEXT: 16 Merged IPI records +SUMMARY-NEXT: 25 Merged TPI records SUMMARY-NEXT: 3 Output PDB strings SUMMARY-NEXT: 4 Global symbol records SUMMARY-NEXT: 14 Module symbol records diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test index 161ee88d27f5e..b0692ee8002f7 100644 --- a/lld/test/COFF/precomp-link.test +++ b/lld/test/COFF/precomp-link.test @@ -5,7 +5,6 @@ RUN: lld-link %S/Inputs/precomp.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b. RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE -RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE FIXME: The following RUN line should fail, regardless of whether debug info is enabled or not. Normally this would result in an error due to missing _PchSym_ @@ -53,19 +52,12 @@ CHECK-NOT: LF_PRECOMP CHECK-NOT: LF_ENDPRECOMP -Re-run with ghash. Eventually, perhaps this will be the default. - -RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf /summary | FileCheck %s -check-prefix SUMMARY -RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s - - SUMMARY: Summary SUMMARY-NEXT: -------------------------------------------------------------------------------- SUMMARY-NEXT: 3 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 0 PDB type server dependencies SUMMARY-NEXT: 1 Precomp OBJ dependencies -SUMMARY-NEXT: 874 Merged TPI records -SUMMARY-NEXT: 170 Merged IPI records +SUMMARY-NEXT: 1044 Merged TPI records SUMMARY-NEXT: 5 Output PDB strings SUMMARY-NEXT: 167 Global symbol records SUMMARY-NEXT: 20 Module symbol records diff --git a/lld/test/COFF/s_udt.s b/lld/test/COFF/s_udt.s index 373394334b19c..63e4099709575 100644 --- a/lld/test/COFF/s_udt.s +++ b/lld/test/COFF/s_udt.s @@ -2,8 +2,6 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-windows-msvc < %s > %t.obj # RUN: lld-link /DEBUG:FULL /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe # RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s -# RUN: lld-link /DEBUG:FULL /debug:ghash /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe -# RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s # CHECK: Types (TPI Stream) # CHECK-NEXT: ============================================================ diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h index e6ade770457c2..b0a16cccbff31 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -86,16 +86,6 @@ struct GloballyHashedType { bool empty() const { return *(const uint64_t*)Hash.data() == 0; } - friend inline bool operator==(const GloballyHashedType &L, - const GloballyHashedType &R) { - return L.Hash == R.Hash; - } - - friend inline bool operator!=(const GloballyHashedType &L, - const GloballyHashedType &R) { - return !(L.Hash == R.Hash); - } - /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -216,7 +206,7 @@ template <> struct DenseMapInfo { static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS) { - return LHS == RHS; + return LHS.Hash == RHS.Hash; } }; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index bdc6cf46509bc..b9e2562bfc2b1 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -116,22 +116,13 @@ class TypeIndex { uint32_t toArrayIndex() const { assert(!isSimple()); - return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex; + return getIndex() - FirstNonSimpleIndex; } static TypeIndex fromArrayIndex(uint32_t Index) { return TypeIndex(Index + FirstNonSimpleIndex); } - static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) { - return TypeIndex((Index + FirstNonSimpleIndex) | - (IsItem ? DecoratedItemIdMask : 0)); - } - - TypeIndex removeDecoration() { - return TypeIndex(Index & ~DecoratedItemIdMask); - } - SimpleTypeKind getSimpleKind() const { assert(isSimple()); return static_cast(Index & SimpleKindMask); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index 9ef2ee6a93070..72d98e9c2c4d1 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -54,20 +54,16 @@ class TpiStreamBuilder { void setVersionHeader(PdbRaw_TpiVer Version); void addTypeRecord(ArrayRef Type, Optional Hash); - void addTypeRecords(ArrayRef Types, ArrayRef Sizes, - ArrayRef Hashes); Error finalizeMsfLayout(); - uint32_t getRecordCount() const { return TypeRecordCount; } + uint32_t getRecordCount() const { return TypeRecords.size(); } Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); private: - void updateTypeIndexOffsets(ArrayRef Sizes); - uint32_t calculateHashBufferSize() const; uint32_t calculateIndexOffsetSize() const; Error finalize(); @@ -75,11 +71,10 @@ class TpiStreamBuilder { msf::MSFBuilder &Msf; BumpPtrAllocator &Allocator; - uint32_t TypeRecordCount = 0; size_t TypeRecordBytes = 0; PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; - std::vector> TypeRecBuffers; + std::vector> TypeRecords; std::vector TypeHashes; std::vector TypeIndexOffsets; uint32_t HashStreamIndex = kInvalidStreamIndex; diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp index 1ca899789bef2..47b5498181b7f 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -9,7 +9,6 @@ #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" @@ -78,10 +77,9 @@ Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { uint32_t Size = Indices.size(); Name = "("; for (uint32_t I = 0; I < Size; ++I) { - if (Indices[I] < CurrentTypeIndex) - Name.append(Types.getTypeName(Indices[I])); - else - Name.append(""); + assert(Indices[I] < CurrentTypeIndex); + + Name.append(Types.getTypeName(Indices[I])); if (I + 1 != Size) Name.append(", "); } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index b5e7b03e6917f..51a1f0a544e3c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/Error.h" #include #include -#include using namespace llvm; using namespace llvm::msf; @@ -42,68 +41,39 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { VerHeader = Version; } -void TpiStreamBuilder::updateTypeIndexOffsets(ArrayRef Sizes) { - // If we just crossed an 8KB threshold, add a type index offset. - for (uint16_t Size : Sizes) { - size_t NewSize = TypeRecordBytes + Size; - constexpr size_t EightKB = 8 * 1024; - if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecordCount == 0) { - TypeIndexOffsets.push_back( - {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + - TypeRecordCount), - ulittle32_t(TypeRecordBytes)}); - } - ++TypeRecordCount; - TypeRecordBytes = NewSize; - } -} - void TpiStreamBuilder::addTypeRecord(ArrayRef Record, Optional Hash) { + // If we just crossed an 8KB threshold, add a type index offset. assert(((Record.size() & 3) == 0) && "The type record's size is not a multiple of 4 bytes which will " "cause misalignment in the output TPI stream!"); - assert(Record.size() <= codeview::MaxRecordLength); - uint16_t OneSize = (uint16_t)Record.size(); - updateTypeIndexOffsets(makeArrayRef(&OneSize, 1)); + size_t NewSize = TypeRecordBytes + Record.size(); + constexpr size_t EightKB = 8 * 1024; + if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { + TypeIndexOffsets.push_back( + {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + + TypeRecords.size()), + ulittle32_t(TypeRecordBytes)}); + } + TypeRecordBytes = NewSize; - TypeRecBuffers.push_back(Record); - // FIXME: Require it. + TypeRecords.push_back(Record); if (Hash) TypeHashes.push_back(*Hash); } -void TpiStreamBuilder::addTypeRecords(ArrayRef Types, - ArrayRef Sizes, - ArrayRef Hashes) { - // Ignore empty type buffers. There should be no hashes or sizes in this case. - if (Types.empty()) { - assert(Sizes.empty() && Hashes.empty()); - return; - } - - assert(((Types.size() & 3) == 0) && - "The type record's size is not a multiple of 4 bytes which will " - "cause misalignment in the output TPI stream!"); - assert(Sizes.size() == Hashes.size() && "sizes and hashes should be in sync"); - assert(std::accumulate(Sizes.begin(), Sizes.end(), 0U) == Types.size() && - "sizes of type records should sum to the size of the types"); - updateTypeIndexOffsets(Sizes); - - TypeRecBuffers.push_back(Types); - TypeHashes.insert(TypeHashes.end(), Hashes.begin(), Hashes.end()); -} - Error TpiStreamBuilder::finalize() { if (Header) return Error::success(); TpiStreamHeader *H = Allocator.Allocate(); + uint32_t Count = TypeRecords.size(); + H->Version = VerHeader; H->HeaderSize = sizeof(TpiStreamHeader); H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; - H->TypeIndexEnd = H->TypeIndexBegin + TypeRecordCount; + H->TypeIndexEnd = H->TypeIndexBegin + Count; H->TypeRecordBytes = TypeRecordBytes; H->HashStreamIndex = HashStreamIndex; @@ -134,7 +104,7 @@ uint32_t TpiStreamBuilder::calculateSerializedLength() { } uint32_t TpiStreamBuilder::calculateHashBufferSize() const { - assert((TypeRecordCount == TypeHashes.size() || TypeHashes.empty()) && + assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) && "either all or no type records should have hashes"); return TypeHashes.size() * sizeof(ulittle32_t); } @@ -185,7 +155,7 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecBuffers) { + for (auto Rec : TypeRecords) { assert(!Rec.empty() && "Attempting to write an empty type record shifts " "all offsets in the TPI stream!"); assert(((Rec.size() & 3) == 0) && From c694588fc52a8845174fee06ad0bcfa338e87816 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Wed, 30 Sep 2020 14:55:59 -0700 Subject: [PATCH 210/544] [mlir][Linalg] Add pattern to tile and fuse Linalg operations on buffers. The pattern is structured similar to other patterns like LinalgTilingPattern. The fusion patterns takes options that allows you to fuse with producers of multiple operands at once. - The pattern fuses only at the level that is known to be legal, i.e if a reduction loop in the consumer is tiled, then fusion should happen "before" this loop. Some refactoring of the fusion code is needed to fuse only where it is legal. - Since the fusion on buffers uses the LinalgDependenceGraph that is not mutable in place the fusion pattern keeps the original operations in the IR, but are tagged with a marker that can be later used to find the original operations. This change also fixes an issue with tiling and distribution/interchange where if the tile size of a loop were 0 it wasnt account for in these. Differential Revision: https://reviews.llvm.org/D88435 --- .../Linalg/IR/LinalgStructuredOpsInterface.td | 18 + .../Dialect/Linalg/Transforms/Transforms.h | 131 ++++++ .../include/mlir/Dialect/Linalg/Utils/Utils.h | 1 + mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 435 +++++++++++++++--- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 105 +++-- .../Dialect/Linalg/Transforms/Transforms.cpp | 37 ++ mlir/test/Dialect/Linalg/fusion-pattern.mlir | 297 ++++++++++++ mlir/test/lib/Transforms/CMakeLists.txt | 1 + .../Transforms/TestLinalgFusionTransforms.cpp | 112 +++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 10 files changed, 1033 insertions(+), 106 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/fusion-pattern.mlir create mode 100644 mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 23d296c392ff9..f51f7b913027f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -459,6 +459,24 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { })); }] >, + InterfaceMethod< + /*desc=*/[{ + Return the position of buffer in inputs + outputs list + }], + /*retTy=*/"Optional", + /*methodName=*/"getIndexOfInputAndOutputBuffer", + /*args=*/(ins "Value":$value), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + Optional inputIndex = getIndexOfInput(value); + if (inputIndex.hasValue()) return inputIndex.getValue(); + Optional outputIndex = getIndexOfOutputBuffer(value); + if (outputIndex.hasValue()) { + return $_op.getNumInputs() + outputIndex.getValue(); + } + return llvm::None; + }] + >, //===------------------------------------------------------------------===// // Other interface methods. diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 00a094d720767..a7f8c31e22643 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -18,6 +18,7 @@ namespace mlir { namespace linalg { +struct LinalgFusionOptions; struct LinalgTilingOptions; //===----------------------------------------------------------------------===// @@ -30,6 +31,14 @@ struct TiledLinalgOp { SmallVector loops; }; +struct TiledAndFusedLinalgOps { + LinalgOp op; + SmallVector fusedProducers; + SmallVector originalProducers; + SmallVector fusedLoops; + SmallVector unfusedLoops; +}; + /// Populates patterns for vectorization of all ConvN-D ops. void populateConvVectorizationPatterns( MLIRContext *context, SmallVectorImpl &patterns, @@ -53,6 +62,71 @@ void populateConvVectorizationPatterns( Optional tileLinalgOp(OpBuilder &b, LinalgOp op, const LinalgTilingOptions &options); +/// Tile and fuse the `op` with its producers. The tile and fuse proceeds in +/// three steps +/// - Find tile loops that are fusable with its producer tile loops (a.k.a. tile +/// + fuse loops). +/// - Tile just these loops of the consumer (root operation) and fuse with +/// the producer. +/// - Tile again the tiled consumer operation produced above to do rest of +/// the tiling specified by the `tilingOptions`. +/// +/// For example, consider the sequence of matmul below +/// +/// linalg.matmul ins(%arg0, %arg1 : memref<256x32xf32>, memref<32x32xf32>) +/// outs(%arg2 : memref<256x32xf32>) +/// linalg.matmul ins(%arg2, %arg3 : memref<256x32xf32>, memref<32x32xf32>) +/// outs(%arg4 : memref<256x32xf32>) +/// +/// It is legal to fuse the RAW dependence (through %arg2) by only fusing the +/// matmuls row-wise. For example, the fused computation for the above is shown +/// below. The outer `scf.parallel` loop is the "fused" loop obtained by tiling +/// along the rows of the matrix. The entire rows of the first matmul operation +/// need to be computed before they can be used for the second matmul. The +/// second matmul is further tiled (similar to normal tiling). +/// +/// #map0 = affine_map<(d0, d1)[s0] -> (d0 * 32 + s0 + d1)> +/// #map1 = affine_map<(d0, d1) -> (d0 * 32 + d1)> +/// scf.parallel (%arg5) = (%c0) to (%c256) step (%c16) { +/// %0 = subview %arg2[%arg5, 0] [16, 32] [1, 1] +/// : memref<256x32xf32> to memref<16x32xf32, #map0> +/// %1 = subview %arg4[%arg5, 0] [16, 32] [1, 1] +/// : memref<256x32xf32> to memref<16x32xf32, #map0> +/// %2 = subview %arg0[%arg5, 0] [16, 32] [1, 1] +/// : memref<256x32xf32> to memref<16x32xf32, #map0> +/// %3 = subview %arg1[0, 0] [32, 32] [1, 1] +/// : memref<32x32xf32> to memref<32x32xf32, #map1> +/// linalg.matmul +/// ins(%2, %3 : memref<16x32xf32, #map0>, memref<32x32xf32, #map1>) +/// outs(%0 : memref<16x32xf32, #map0>) +/// scf.parallel (%arg6) = (%c0) to (%c32) step (%c8) { +/// scf.for %arg7 = %c0 to %c32 step %c4 { +/// %4 = subview %0[0, %arg7] [16, 4] [1, 1] +/// : memref<16x32xf32, #map0> to memref<16x4xf32, #map0> +/// %5 = subview %arg3[%arg7, %arg6] [4, 8] [1, 1] +/// : memref<32x32xf32> to memref<4x8xf32, #map0> +/// %6 = subview %1[0, %arg6] [16, 8] [1, 1] +/// : memref<16x32xf32, #map0> to memref<16x8xf32, #map0> +/// linalg.matmul +/// ins(%4, %5 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>) +/// outs(%6 : memref<16x8xf32, #map0>) +/// } +/// scf.yield +/// } +/// scf.yield +/// } +/// +/// The following tiling options are handled differently in tile+fuse (compared +/// to tile only) +/// - Interchange of the tiling loops is not supported right now. +/// - Distribution is only done for the tile+fuse loops. The tiled loops +/// generated by the second tiling is not distributed. +Optional +tileAndFuseLinalgOps(PatternRewriter &rewriter, LinalgOp op, + const LinalgDependenceGraph &dependenceGraph, + const LinalgTilingOptions &tilingOptions, + const LinalgFusionOptions &fusionOptions); + /// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`. /// This is an in-place transformation controlled by `interchangeVector`. /// An empty vector is interpreted as the identity permutation and the @@ -323,6 +397,63 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern { } }; +struct LinalgFusionOptions { + /// Optional list of operands indices to use for fusion. When unspecified, + /// only one fusion is done, i.e., the pattern returns after the first fusion. + Optional> indicesToFuse = None; + LinalgFusionOptions &setIndicesToFuse(ArrayRef operands) { + indicesToFuse = DenseSet(); + indicesToFuse->insert(operands.begin(), operands.end()); + return *this; + } +}; + +struct LinalgBaseTileAndFusePattern : public RewritePattern { + LinalgBaseTileAndFusePattern(StringRef opName, MLIRContext *context, + const LinalgDependenceGraph &dependenceGraph, + LinalgTilingOptions tilingOptions, + LinalgFusionOptions fusionOptions, + LinalgMarker marker = LinalgMarker(), + LinalgMarker fusedOpMarker = LinalgMarker(), + LinalgMarker originalOpMarker = LinalgMarker(), + PatternBenefit benefit = 1); + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + /// Dependence graph needed for fusion. + const LinalgDependenceGraph &dependenceGraph; + /// Options to control tiling. + LinalgTilingOptions tilingOptions; + /// Options to control fusion. + LinalgFusionOptions fusionOptions; + /// Marker to control application of the pattern. + LinalgMarker marker; + /// Marker set on the fused op after tile and fuse. + LinalgMarker fusedOpMarker; + /// The dependenceGraph is not modifiable, i.e. if the Linalg operations used + /// to build the dependence graph changes then the dependenceGraph needs to be + /// recomputed right now. To not invalidate the dependenceGraph as + /// transformation happens, the original producer can be tagged with a marker + /// that can be later used to delete the original operations. + LinalgMarker originalOpMarker; +}; + +template +struct LinalgTileAndFusePattern : public LinalgBaseTileAndFusePattern { + LinalgTileAndFusePattern(MLIRContext *context, + const LinalgDependenceGraph &dependenceGraph, + LinalgTilingOptions tilingOptions, + LinalgFusionOptions fusionOptions, + LinalgMarker marker = LinalgMarker(), + LinalgMarker fusedOpMarker = LinalgMarker(), + LinalgMarker originalOpMarker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgBaseTileAndFusePattern( + OpTy::getOperationName(), context, dependenceGraph, tilingOptions, + fusionOptions, marker, fusedOpMarker, originalOpMarker, benefit) {} +}; + /// /// Linalg interchange patterns. /// diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index aca5a981b0034..76ce4eb30e7f3 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -10,6 +10,7 @@ #define MLIR_DIALECT_LINALG_UTILS_H_ #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" +#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/EDSC/Builders.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/SCF/SCF.h" diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index dfc977daa2071..8dadfe63e6596 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" @@ -154,9 +155,9 @@ static ViewDimension getViewDefiningLoopRange(LinalgOp op, unsigned loopDepth) { llvm_unreachable("Expect to be able to extract a view defining loop range"); } -static LinalgOp fuse(Value producedView, LinalgOp producer, LinalgOp consumer, - unsigned consumerIdx, unsigned producerIdx, - OperationFolder *folder) { +static LinalgOp fuse(OpBuilder &b, LinalgOp producer, unsigned producerIdx, + LinalgOp consumer, unsigned consumerIdx, + OperationFolder *folder = nullptr) { assert(producer.hasBufferSemantics() && "expected linalg op with buffer semantics"); assert(consumer.hasBufferSemantics() && @@ -174,9 +175,7 @@ static LinalgOp fuse(Value producedView, LinalgOp producer, LinalgOp consumer, // we can always identify a data dimension with a (at least one) loop // dimension. AffineMap producerMap = - producer.indexing_maps()[producer.getNumInputs() + producerIdx] - .cast() - .getValue(); + producer.indexing_maps()[producerIdx].cast().getValue(); LLVM_DEBUG(dbgs() << "Producer Idx: " << producerIdx << ", producer map: " << producerMap << "\n"); @@ -185,10 +184,9 @@ static LinalgOp fuse(Value producedView, LinalgOp producer, LinalgOp consumer, unsigned nWin = producer.getNumWindowLoops(); SmallVector loopRanges(nPar + nRed + nWin); - OpBuilder b(consumer.getOperation()); - auto loc = consumer.getLoc(); // Iterate over dimensions identified by the producer map for `producerIdx`. // This defines a subset of the loop ranges that we need to complete later. + auto loc = consumer.getLoc(); for (auto en : llvm::enumerate(producerMap.getResults())) { unsigned posInProducerLoop = en.value().cast().getPosition(); loopRanges[posInProducerLoop] = @@ -319,71 +317,380 @@ static bool isSameSubView(Value a, Value b) { return true; } -static Optional -fuseProducerOfDep(OpBuilder &b, LinalgOp consumer, unsigned consumerIdx, - const LinalgDependenceGraph &graph, OperationFolder *folder, - LinalgDependenceGraph::DependenceType depType) { - assert(consumer.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - LLVM_DEBUG(dbgs() << "\nStart examining consumer: " - << *consumer.getOperation()); - for (auto dependence : graph.getDependencesInto(consumer, depType)) { - LLVM_DEBUG(dbgs() << "\n***Consider producer:\t" - << *dependence.dependentOpView.op << "\n"); - auto producer = cast(dependence.dependentOpView.op); - - // Check that the dependence is indeed on the input `consumerIdx` view. - auto consumedView = dependence.indexingView; - if (!isSameSubView(consumer.getBuffer(consumerIdx), consumedView)) - continue; - - // Consumer consumes this view, `isStructurallyFusableProducer` also checks - // whether it is a strict subview of the producer view. - auto producedView = dependence.dependentOpView.view; - auto producerIdx = producer.getIndexOfOutputBuffer(producedView).getValue(); - // `consumerIdx` and `producerIdx` exist by construction. - LLVM_DEBUG(dbgs() << "\n" - << LinalgDependenceGraph::getDependenceTypeStr(depType) - << "producer: " << *producer.getOperation() << " view: " - << producedView << " output index: " << producerIdx); - - // Must be a subview or a slice to guarantee there are loops we can fuse - // into. - auto subView = consumedView.getDefiningOp(); - auto slice = consumedView.getDefiningOp(); - if (!subView && !slice) { - LLVM_DEBUG(dbgs() << "\nNot fusable (not a subview or slice)"); - continue; - } +static Optional +findFusableProducer(LinalgOp consumer, unsigned consumerIdx, + const LinalgDependenceGraph &dependenceGraph) { + // Only consider RAW and WAW atm. + for (auto depType : { + LinalgDependenceGraph::DependenceType::RAW, + LinalgDependenceGraph::DependenceType::WAW, + }) { + for (auto dependence : + dependenceGraph.getDependencesInto(consumer, depType)) { + auto producer = cast(dependence.dependentOpView.op); - // Simple fusability checks. - if (!isFusableInto(graph, consumer, consumedView, producer)) - continue; + // Check that the dependence is indeed on the input `consumerIdx` view. + auto consumedView = dependence.indexingView; + if (!isSameSubView(consumer.getBuffer(consumerIdx), consumedView)) + continue; - // Fuse `producer` just before `consumer`. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(consumer.getOperation()); - ScopedContext scope(b, consumer.getLoc()); - LLVM_DEBUG(dbgs() << "Fuse into consumer: " << *consumer << "\n"); - auto fusedProducer = fuse(producedView, producer, consumer, consumerIdx, - producerIdx, folder); + // Consumer consumes this view, `isStructurallyFusableProducer` also + // checks whether it is a strict subview of the producer view. + auto producedView = dependence.dependentOpView.view; + auto producerIdx = + producer.getIndexOfOutputBuffer(producedView).getValue(); + // `consumerIdx` and `producerIdx` exist by construction. + LLVM_DEBUG(dbgs() << "\n" + << LinalgDependenceGraph::getDependenceTypeStr(depType) + << "producer: " << *producer.getOperation() << " view: " + << producedView << " output index: " << producerIdx); + (void)producerIdx; + + // Simple fusability checks. + if (!isFusableInto(dependenceGraph, consumer, consumedView, producer)) + continue; - return FusionInfo{producer, fusedProducer}; + return dependence; + } } - return llvm::None; + return {}; } -// Only consider RAW and WAW atm. Optional mlir::linalg::fuseProducerOf( OpBuilder &b, LinalgOp consumer, unsigned consumerIdx, const LinalgDependenceGraph &graph, OperationFolder *folder) { - for (auto dep : { - LinalgDependenceGraph::DependenceType::RAW, - LinalgDependenceGraph::DependenceType::WAW, - }) { - if (auto res = - fuseProducerOfDep(b, consumer, consumerIdx, graph, folder, dep)) - return res; + Optional fusableDependence = + findFusableProducer(consumer, consumerIdx, graph); + if (!fusableDependence) + return {}; + + LinalgOp producerOp = cast(fusableDependence->dependentOpView.op); + Value producerView = fusableDependence->dependentOpView.view; + Value consumerView = fusableDependence->indexingView; + + // Must be a subview or a slice to guarantee there are loops we can fuse + // into. + auto subView = consumerView.getDefiningOp(); + auto slice = consumerView.getDefiningOp(); + if (!subView && !slice) { + LLVM_DEBUG(dbgs() << "\nNot fusable (not a subview or slice)"); + return {}; + } + + // Fuse `producer` just before `consumer`. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(consumer.getOperation()); + ScopedContext scope(b, consumer.getLoc()); + LLVM_DEBUG(dbgs() << "Fuse into consumer: " << *consumer << "\n"); + Optional producerIdxOpt = + producerOp.getIndexOfInputAndOutputBuffer(producerView); + assert(producerIdxOpt.hasValue() && "incorrect operand index"); + unsigned producerIdx = producerIdxOpt.getValue(); + + auto fusedProducer = + fuse(b, producerOp, producerIdx, consumer, consumerIdx, folder); + return FusionInfo{producerOp, fusedProducer}; +} + +/// Returns the positions of the loop in `op` that can be tiled based on the +/// operations that are to be fused with it. For example, in a +/// +/// linalg. matmul ins(%a, %b : ...) outs(%c : ...) +/// +/// if the producer of %a needs to be fused with this op, only the `i` loop of +/// the matmul can be tiled while fusing. If producer of %a, and %b are to be +/// fused, then no loops can be tiled while fusing. +static DenseSet collectTileAndFuseLoops( + LinalgOp op, ArrayRef + fusableDependences) { + // 1. Only parallel loops can be used for tile + fuse. Find the number of + // common outer parallel loops between the op and its producers being fused. + auto getNumOuterParallelLoops = [](LinalgOp linalgOp) { + return linalgOp.iterator_types() + .getValue() + .take_while([](Attribute attr) -> bool { + return attr.cast().getValue() == + getParallelIteratorTypeName(); + }) + .size(); + }; + + size_t numOuterParallelLoops = getNumOuterParallelLoops(op); + for (auto dependence : fusableDependences) { + numOuterParallelLoops = + std::min(numOuterParallelLoops, getNumOuterParallelLoops(cast( + dependence.dependentOpView.op))); + } + + // Need to compute what tiled loops can be "fused". Given the precondition + // that all indexing map for the producer view is a projected permutation, we + // can assert that the producer iterates over the dimensions of the "fused + // view" only once. To be used a fused loop the producer should use this loop + // to access the fused view. For example, consider + // + // ``` + // linalg.add ins(%a, %b) outs(%c) + // linalg.matmul ins(%d, %c) outs(%e) + // ``` + // + // if `linalg.add` has the semantics of `c = a + b`, then the following + // tile+fuse code is correct. + // + // ``` + // for j ... += TSj + // %sa = subview %a[0, %j][...] + // %sb = subview %b[0, %j][...] + // %sc = subview %c[0, %j][...] + // %sd = subview %d[0, 0][...] + // %se = subview %e[0, %j][...] + // linalg.add ins(%sa, %sb) outs(%sc) + // linalg.matmul ins(%sd, %sc) outs(%se) + // ``` + // + // On the other hand tiling along i would be incorrect + // + // ``` + // for %i .. += TSi + // %sa = subview %a[%i, 0][...] + // %sb = subview %b[%i, 0][...] + // %sc = subview %c[%i, 0][...] + // %sc2 = subview %c[0, 0][...] + // %sd = subview %d[%i, 0][...] + // %se = subview %e[%i, 0][...] + // linalg.add ins(%sa, %sb) outs(%sc) + // linalg.matmul ins(%sd, %sc2) outs(%se) + // ``` + // + // The write to the subview `%sc` in `linalg.add` is performed after the read + // from it using `%sc2` violating the RAW dependence of the original code. To + // find such loops indexing map of the fused view in the consumer op is + // used. For the above example, this indexing map is + // + // affine_map<(d0, d1, d2) -> (d2, d1)> + // + // Since d0 is not in the result expressions of this map, it is not treated as + // tile + fuse loop, (but d1 is). + // + // TODO: The above is probably restrictive and there might be a generalization + // of these that might allow for more fusion opportunities. Explore based on + // needs. + SmallVector, 1> commonTilableLoops; + for (auto dependence : fusableDependences) { + unsigned consumerIdx = + op.getIndexOfInputAndOutputBuffer(dependence.indexingView).getValue(); + AffineMap consumerAccess = op.getIndexingMap(consumerIdx); + // Previously asserted that the consumerAccess map is a projected + // permutation, so all results are known to be AffineDimExprs. To remove + // this restriction walk the expression to find which dimensions of the + // consumer loop appear in the `consumerAccess`. + DenseSet positions; + for (auto expr : consumerAccess.getResults()) + positions.insert(expr.cast().getPosition()); + commonTilableLoops.emplace_back(std::move(positions)); + } + + // 2. Of the outer parallel loops, only those loops can be tiled + fused as + // computed above for all the fused dependences can be used to tile and fuse. + DenseSet tilableParallelLoops; + for (auto index : llvm::seq(0, numOuterParallelLoops)) { + if (llvm::all_of(commonTilableLoops, + [&](const DenseSet &tilableLoops) { + return tilableLoops.count(index); + })) + tilableParallelLoops.insert(index); + } + return tilableParallelLoops; +} + +/// Find all dependences that are to be fusable. +static Optional< + SmallVector> +findAllFusableDependences(LinalgOp op, + const LinalgDependenceGraph &dependenceGraph, + const LinalgFusionOptions &fusionOptions) { + SmallVector + fusableDependences; + for (auto operand : llvm::enumerate(op.getInputsAndOutputBuffers())) { + if (fusionOptions.indicesToFuse && + !fusionOptions.indicesToFuse->count(operand.index())) + continue; + Optional + fusableDependence = + findFusableProducer(op, operand.index(), dependenceGraph); + if (!fusableDependence) + continue; + // Make sure that the indexing map of the view used for fusion in the + // producer is a projected permutation. + LinalgOp producerOp = cast(fusableDependence->dependentOpView.op); + Value producerView = fusableDependence->dependentOpView.view; + unsigned producerIdx = + producerOp.getIndexOfInputAndOutputBuffer(producerView).getValue(); + AffineMap producerMap = producerOp.getIndexingMap(producerIdx); + if (!producerMap.isProjectedPermutation()) { + op.emitError("unhandled non permutation indexing map for fused view in " + "producer for operand at index ") + << operand.index(); + return llvm::None; + } + Value consumerView = fusableDependence->indexingView; + unsigned consumerIdx = + op.getIndexOfInputAndOutputBuffer(consumerView).getValue(); + if (!op.getIndexingMap(consumerIdx).isProjectedPermutation()) { + op.emitError( + "unhandled case where indexing map for fused view in the consumer is " + "not a projected permuration while fusing at index ") + << operand.index(); + return llvm::None; + } + fusableDependences.push_back(*fusableDependence); + if (!fusionOptions.indicesToFuse) + break; + } + return fusableDependences; +} + +static bool isZero(Value v) { + if (auto cst = v.getDefiningOp()) + return cst.getValue() == 0; + return false; +} + +template +static Optional +tileAndFuseLinalgOpsImpl(PatternRewriter &rewriter, LinalgOp op, + const LinalgDependenceGraph &dependenceGraph, + const LinalgTilingOptions &tilingOptions, + const LinalgFusionOptions &fusionOptions) { + assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); + // Some of the tiling options might not be supportable with tile and fuse. + // TODO: Support interchange with tile + fuse. + if (!tilingOptions.interchangeVector.empty()) { + op.emitError("unable to handle tile and fuse with interchange"); + return llvm::None; + } + + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(op); + ScopedContext scope(rewriter, op.getLoc()); + + // Find all the producers. + Optional> + fusableDependencesOpt = + findAllFusableDependences(op, dependenceGraph, fusionOptions); + if (!fusableDependencesOpt) + return llvm::None; + ArrayRef fusableDependences( + *fusableDependencesOpt); + + // Enforce the convention that "tiling by zero" skips tiling a particular + // dimension. This convention is significantly simpler to handle instead of + // adjusting affine maps to account for missing dimensions. + auto nLoops = op.getNumLoops(); + SmallVector tileSizeVector = + tilingOptions.tileSizeComputationFunction(rewriter, op); + if (tileSizeVector.size() < nLoops) { + auto zero = std_constant_index(0); + tileSizeVector.append(nLoops - tileSizeVector.size(), zero); + } + + TiledAndFusedLinalgOps ret; + + // Find the loops that can be tiled and fused. + DenseSet tileFuseLoops = + collectTileAndFuseLoops(op, fusableDependences); + + // If there are no fusable dependences or there are no tile+fusable loops, + // just return. + if (fusableDependences.empty() || tileFuseLoops.empty()) { + return llvm::None; + } + + // Get the tile sizes for the first and second tiling steps. For the first + // step the tile size are set to zero for the loops that arent + // fused. Similarly for the second step, the tile sizes are set to zero for + // the loops that are fused. For example, if for the following input + // + // ``` + // linalg.add ins(%a, %b) outs(%c) + // linalg.matmul ins(%d, %c) outs(%e) + // ``` + // + // if the tile sizes of the `{i, j, k}` loops where given as `{ti, tj, tk}` + // respectively, and since only `j` can be tiled and fused. The tile sizes + // would be `{0, t_j, 0}` for the first tiling that tiles just the fusable + // loops. The second tiling would be use tile sizes of `{t_i, 0, t_k}` to tile + // the tiled matmul generated by the first tiling step. + SmallVector tileAndFuseSizes, tileSizes; + for (auto tileSize : enumerate(tileSizeVector)) { + auto zero = std_constant_index(0); + if (tileFuseLoops.count(tileSize.index())) { + tileAndFuseSizes.push_back(tileSize.value()); + tileSizes.push_back(zero); + } else { + tileSizes.push_back(tileSize.value()); + tileAndFuseSizes.push_back(zero); + } + } + + // Tile for the loops that can be fused. + LinalgTilingOptions firstTilingOptions = tilingOptions; + firstTilingOptions.setTileSizes(tileAndFuseSizes); + Optional firstTiledOp = + tileLinalgOp(rewriter, op, firstTilingOptions); + if (!firstTiledOp) + return llvm::None; + ret.op = firstTiledOp->op; + ret.fusedLoops.assign(firstTiledOp->loops.begin(), firstTiledOp->loops.end()); + + rewriter.setInsertionPoint(ret.op); + // Fuse the operands. + for (auto producer : enumerate(fusableDependences)) { + LinalgOp producerOp = cast(producer.value().dependentOpView.op); + unsigned producerIdx = producerOp + .getIndexOfInputAndOutputBuffer( + producer.value().dependentOpView.view) + .getValue(); + unsigned consumerIdx = + op.getIndexOfInputAndOutputBuffer(producer.value().indexingView) + .getValue(); + LinalgOp fusedOp = + fuse(rewriter, producerOp, producerIdx, ret.op, consumerIdx); + ret.fusedProducers.push_back(fusedOp); + ret.originalProducers.push_back(producerOp); + } + + if (!llvm::all_of(tileSizes, isZero)) { + // Tile the remaining loops of the root operation. + LinalgTilingOptions secondTilingOptions = tilingOptions; + // The distribution is done only for the tile+fused loops. + secondTilingOptions.distribution = llvm::None; + secondTilingOptions.setTileSizes(tileSizes); + Optional secondTiledOp = + tileLinalgOp(rewriter, ret.op, secondTilingOptions); + if (!secondTiledOp) + return llvm::None; + ret.unfusedLoops.assign(secondTiledOp->loops.begin(), + secondTiledOp->loops.end()); + rewriter.eraseOp(ret.op); + ret.op = secondTiledOp->op; + } + + return ret; +} + +Optional +mlir::linalg::tileAndFuseLinalgOps(PatternRewriter &rewriter, LinalgOp op, + const LinalgDependenceGraph &dependenceGraph, + const LinalgTilingOptions &tilingOptions, + const LinalgFusionOptions &fusionOptions) { + switch (tilingOptions.loopType) { + case LinalgTilingLoopType::Loops: + return tileAndFuseLinalgOpsImpl(rewriter, op, dependenceGraph, + tilingOptions, fusionOptions); + case LinalgTilingLoopType::ParallelLoops: + return tileAndFuseLinalgOpsImpl( + rewriter, op, dependenceGraph, tilingOptions, fusionOptions); + default:; } return llvm::None; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 3db801bc2d575..68d69549611cc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -318,25 +318,10 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, } template -Optional static tileLinalgOpImpl( - OpBuilder &b, LinalgOp op, const LinalgTilingOptions &options) { - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(op); - ScopedContext scope(b, op.getLoc()); - - assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); - // 1. Enforce the convention that "tiling by zero" skips tiling a particular - // dimension. This convention is significantly simpler to handle instead of - // adjusting affine maps to account for missing dimensions. +static Optional +tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, + const LinalgTilingOptions &options) { auto nLoops = op.getNumLoops(); - SmallVector tileSizeVector = - options.tileSizeComputationFunction(b, op); - if (tileSizeVector.size() < nLoops) { - auto zero = std_constant_index(0); - tileSizeVector.append(nLoops - tileSizeVector.size(), zero); - } - - ArrayRef tileSizes = tileSizeVector; // Initial tile sizes may be too big, only take the first nLoops. tileSizes = tileSizes.take_front(nLoops); @@ -350,17 +335,7 @@ Optional static tileLinalgOpImpl( return llvm::None; } - // If interchangeVector is empty, use the identity. Build the permutation map - // otherwise. - auto invPermutationMap = - AffineMap::getMultiDimIdentityMap(tileSizes.size(), b.getContext()); - if (!options.interchangeVector.empty()) - invPermutationMap = inversePermutation(AffineMap::getPermutationMap( - options.interchangeVector, b.getContext())); - if (!invPermutationMap) - return llvm::None; - - // 2. Build the tiled loop ranges. + // 1. Build the tiled loop ranges. auto allViewSizes = getViewSizes(b, op); // The flattened loopToOperandRangesMaps is expected to be an invertible // permutation map (asserted in the inverse calculation). @@ -374,17 +349,39 @@ Optional static tileLinalgOpImpl( SmallVector loopRanges; LoopIndexToRangeIndexMap loopIndexToRangeIndex; std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges( - b, scope.getLocation(), viewSizesToLoopsMap, allViewSizes, tileSizes); - if (!options.interchangeVector.empty()) - applyPermutationToVector(loopRanges, options.interchangeVector); + b, op.getLoc(), viewSizesToLoopsMap, allViewSizes, tileSizes); + SmallVector iteratorTypes; + for (auto attr : + enumerate(op.iterator_types().cast().getValue())) { + if (loopIndexToRangeIndex.count(attr.index())) + iteratorTypes.push_back(attr.value()); + } + // If interchangeVector is empty, use the identity. Build the permutation map + // otherwise. + auto invPermutationMap = + AffineMap::getMultiDimIdentityMap(tileSizes.size(), b.getContext()); + if (!options.interchangeVector.empty()) { + // Based on the pruned iterations (due to zero tile size), recompute the + // interchange vector. + SmallVector interchangeVector; + interchangeVector.reserve(options.interchangeVector.size()); + for (auto pos : options.interchangeVector) { + auto it = loopIndexToRangeIndex.find(pos); + if (it == loopIndexToRangeIndex.end()) + continue; + interchangeVector.push_back(it->second); + } + invPermutationMap = inversePermutation( + AffineMap::getPermutationMap(interchangeVector, b.getContext())); + if (!invPermutationMap) + return llvm::None; + applyPermutationToVector(loopRanges, interchangeVector); + applyPermutationToVector(iteratorTypes, interchangeVector); + } - // 3. Create the tiled loops. + // 2. Create the tiled loops. LinalgOp res = op; SmallVector ivs; - SmallVector iteratorTypes = - llvm::to_vector<4>(op.iterator_types().cast().getValue()); - if (!options.interchangeVector.empty()) - applyPermutationToVector(iteratorTypes, options.interchangeVector); GenerateLoopNest::doit( loopRanges, /*iterArgInitValues*/ {}, iteratorTypes, [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { @@ -410,10 +407,10 @@ Optional static tileLinalgOpImpl( }, options.distribution); - // 4. Transforms index arguments of `linalg.generic` w.r.t. to the tiling. + // 3. Transforms index arguments of `linalg.generic` w.r.t. to the tiling. transformIndexedGenericOpIndices(b, res, ivs, loopIndexToRangeIndex); - // 5. Gather the newly created loops and return them with the new op. + // 4. Gather the newly created loops and return them with the new op. SmallVector loops; loops.reserve(ivs.size()); for (auto iv : ivs) { @@ -429,14 +426,38 @@ Optional static tileLinalgOpImpl( return TiledLinalgOp{res, loops}; } +template +Optional static tileLinalgOpImpl( + OpBuilder &b, LinalgOp op, const LinalgTilingOptions &options) { + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(op); + ScopedContext scope(b, op.getLoc()); + + assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); + // Enforce the convention that "tiling by zero" skips tiling a particular + // dimension. This convention is significantly simpler to handle instead of + // adjusting affine maps to account for missing dimensions. + auto nLoops = op.getNumLoops(); + SmallVector tileSizeVector = + options.tileSizeComputationFunction(b, op); + if (tileSizeVector.size() < nLoops) { + auto zero = std_constant_index(0); + tileSizeVector.append(nLoops - tileSizeVector.size(), zero); + } + + return tileLinalgOpImpl(b, op, tileSizeVector, options); +} + Optional mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op, const LinalgTilingOptions &options) { - if (options.loopType == LinalgTilingLoopType::Loops) + switch (options.loopType) { + case LinalgTilingLoopType::Loops: return tileLinalgOpImpl(b, op, options); - if (options.loopType == LinalgTilingLoopType::ParallelLoops) + case LinalgTilingLoopType::ParallelLoops: return tileLinalgOpImpl(b, op, options); - // TODO: Impl tiling to affine loops when it makes sense. + default:; + } return llvm::None; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index c1aad620fe08a..56652cbcb5277 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -129,6 +129,43 @@ LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewrite( return success(); } +mlir::linalg::LinalgBaseTileAndFusePattern::LinalgBaseTileAndFusePattern( + StringRef opName, MLIRContext *context, + const LinalgDependenceGraph &dependenceGraph, + LinalgTilingOptions tilingOptions, LinalgFusionOptions fusionOptions, + LinalgMarker marker, LinalgMarker fusedOpMarker, + LinalgMarker originalOpMarker, PatternBenefit benefit) + : RewritePattern(opName, {}, benefit, context), + dependenceGraph(dependenceGraph), tilingOptions(tilingOptions), + fusionOptions(fusionOptions), marker(marker), + fusedOpMarker(fusedOpMarker), originalOpMarker(originalOpMarker) {} + +LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + LinalgOp linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + if (failed(marker.checkAndNotify(rewriter, linalgOp))) + return failure(); + if (!linalgOp.hasBufferSemantics()) + return failure(); + + Optional tiledAndFusedOps = tileAndFuseLinalgOps( + rewriter, op, dependenceGraph, tilingOptions, fusionOptions); + if (!tiledAndFusedOps) + return failure(); + marker.replaceLinalgMarker(rewriter, tiledAndFusedOps->op.getOperation()); + for (auto fusedOp : tiledAndFusedOps->fusedProducers) { + fusedOpMarker.replaceLinalgMarker(rewriter, fusedOp.getOperation()); + } + for (auto origProducerOp : tiledAndFusedOps->originalProducers) + originalOpMarker.replaceLinalgMarker(rewriter, + origProducerOp.getOperation()); + rewriter.updateRootInPlace( + op, [&]() { originalOpMarker.replaceLinalgMarker(rewriter, op); }); + return success(); +} + /// Linalg base interchange pattern. mlir::linalg::LinalgBaseInterchangePattern::LinalgBaseInterchangePattern( StringRef opName, MLIRContext *context, diff --git a/mlir/test/Dialect/Linalg/fusion-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-pattern.mlir new file mode 100644 index 0000000000000..61e5b746deac7 --- /dev/null +++ b/mlir/test/Dialect/Linalg/fusion-pattern.mlir @@ -0,0 +1,297 @@ +// RUN: mlir-opt %s -test-linalg-fusion-transform-patterns -canonicalize -cse -split-input-file | FileCheck %s + +module { + func @basic_fusion(%arg0: memref, %arg1: memref, + %arg2: memref) { + %cst = constant 0.000000e+00 : f32 + linalg.fill(%arg2, %cst) : memref, f32 + linalg.matmul {__internal_linalg_transform__ = "basic_fusion"} + ins(%arg0, %arg1 : memref, memref) + outs(%arg2 : memref) + return + } +} + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> +// CHECK: func @basic_fusion +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C32:.+]] = constant 32 : index +// CHECK-DAG: %[[C64:.+]] = constant 64 : index +// CHECK-DAG: %[[C16:.+]] = constant 16 : index +// CHECK-DAG: %[[CST:.+]] = constant 0.0{{.*}} : f32 +// CHECK-DAG: linalg.fill(%[[ARG2]], %[[CST]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion_original" +// CHECK-DAG: %[[M:.+]] = dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[N:.+]] = dim %[[ARG1]], %[[C1]] +// CHECK: scf.parallel (%[[IV0:.+]], %[[IV1:.+]]) = +// CHECK-SAME: to (%[[M]], %[[N]]) +// CHECK-SAME: step (%[[C32]], %[[C64]]) { +// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]] +// CHECK: %[[K:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK: %[[SV1:.+]] = subview %[[ARG0]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M]], %[[K]]] +// CHECK: %[[K_2:.+]] = dim %[[ARG1]], %[[C0]] +// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[N]]] +// CHECK: %[[SV2:.+]] = subview %[[ARG1]][0, %[[IV1]]] +// CHECK-SAME: %[[K_2]], %[[TILE_N]] +// CHECK: %[[M_2:.+]] = dim %[[ARG2]], %[[C0]] +// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]] +// CHECK: %[[N_2:.+]] = dim %[[ARG2]], %[[C1]] +// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[N_2]]] +// CHECK: %[[SV3:.+]] = subview %[[ARG2]][%[[IV0]], %[[IV1]]] +// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]] +// CHECK: linalg.fill(%[[SV3]], %[[CST]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion_producer" +// CHECK: scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] { +// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]] +// CHECK: %[[SV4:.+]] = subview %[[SV1]][0, %[[IV2]]] +// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]] +// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]] +// CHECK: %[[SV5:.+]] = subview %[[SV2]][%[[IV2]], 0] +// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]] +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion" +// CHECK-SAME: ins(%[[SV4]], %[[SV5]] +// CHECK-SAME: : memref, memref) +// CHECK-SAME: outs(%[[SV3]] : memref) +// CHECK: } +// CHECK: } +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion_original" + +// ----- + +module { + func @rhs_fusion(%arg0: memref, %arg1: memref, + %arg2: memref, %arg3: memref) { + %cst = constant 0.000000e+00 : f32 + linalg.copy(%arg1, %arg2) : memref, memref + linalg.fill(%arg3, %cst) : memref, f32 + linalg.matmul {__internal_linalg_transform__ = "rhs_fusion"} + ins(%arg0, %arg2 : memref, memref) + outs(%arg3 : memref) + return + } +} +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> +// CHECK: func @rhs_fusion +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: memref +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C32:.+]] = constant 32 : index +// CHECK-DAG: %[[C64:.+]] = constant 64 : index +// CHECK-DAG: %[[C16:.+]] = constant 16 : index +// CHECK-DAG: %[[CST:.+]] = constant 0.0{{.*}} : f32 +// CHECK-DAG: linalg.copy(%[[ARG1]], %[[ARG2]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_original" +// CHECK-DAG: %[[N:.+]] = dim %[[ARG2]], %[[C1]] +// CHECK: scf.parallel (%[[IV0:.+]]) = +// CHECK-SAME: (%[[C0]]) to (%[[N]]) step (%[[C64]]) { +// CHECK: %[[K:.+]] = dim %[[ARG2]], %[[C0]] +// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[N]]] +// CHECK: %[[SV1:.+]] = subview %[[ARG2]][0, %[[IV0]]] +// CHECK-SAME: [%[[K]], %[[TILE_N]]] +// CHECK: %[[M:.+]] = dim %[[ARG3]], %[[C0]] +// CHECK: %[[N_2:.+]] = dim %[[ARG3]], %[[C1]] +// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[N_2]]] +// CHECK: %[[SV2:.+]] = subview %[[ARG3]][0, %[[IV0]]] +// CHECK-SAME: [%[[M]], %[[TILE_N_2]]] +// CHECK: %[[SV3:.+]] = subview %[[ARG1]][0, %[[IV0]]] +// CHECK-SAME: [%[[K]], %[[TILE_N]]] +// CHECK: linalg.copy(%[[SV3]], %[[SV1]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_producer" +// CHECK-NOT: linalg.fill +// CHECK-DAG: %[[M_2:.+]] = dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[K_2:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK: scf.parallel (%[[IV1:.+]]) = +// CHECK-SAME: (%[[C0]]) to (%[[M_2]]) step (%[[C32]]) { +// CHECK-NEXT: scf.for %[[IV2:.+]] = %[[C0]] to %[[K_2]] step %[[C16]] { +// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[M_2]]] +// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]] +// CHECK: %[[SV4:.+]] = subview %[[ARG0]][%[[IV1]], %[[IV2]]] +// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]] +// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]] +// CHECK: %[[SV5:.+]] = subview %[[SV1]][%[[IV2]], 0] +// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]] +// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[M]]] +// CHECK: %[[SV6:.+]] = subview %[[SV2]][%[[IV1]], 0] +// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]] +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion" +// CHECK-SAME: ins(%[[SV4]], %[[SV5]] +// CHECK-SAME: : memref, memref) +// CHECK-SAME: outs(%[[SV6]] : memref) +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_original" + + +// ----- + +module { + func @two_operand_fusion(%arg0: memref, %arg1: memref, + %arg2: memref, %arg3: memref) { + %cst = constant 0.000000e+00 : f32 + linalg.copy(%arg0, %arg1) : memref, memref + linalg.fill(%arg3, %cst) : memref, f32 + linalg.matmul {__internal_linalg_transform__ = "two_operand_fusion"} + ins(%arg1, %arg2 : memref, memref) + outs(%arg3 : memref) + return + } +} +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)> +// CHECK: func @two_operand_fusion +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: memref +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C32:.+]] = constant 32 : index +// CHECK-DAG: %[[C64:.+]] = constant 64 : index +// CHECK-DAG: %[[C16:.+]] = constant 16 : index +// CHECK-DAG: %[[CST:.+]] = constant 0.0{{.*}} : f32 +// CHECK: linalg.copy(%[[ARG0]], %[[ARG1]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" +// CHECK: linalg.fill(%[[ARG3]], %[[CST]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" +// CHECK-DAG: %[[M:.+]] = dim %[[ARG1]], %[[C0]] +// CHECK: scf.parallel (%[[IV0:.+]]) = +// CHECK-SAME: (%[[C0]]) to (%[[M]]) step (%[[C32]]) { +// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]] +// CHECK: %[[K:.+]] = dim %[[ARG1]], %[[C1]] +// CHECK: %[[SV1:.+]] = subview %[[ARG1]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M]], %[[K]]] +// CHECK: %[[M_2:.+]] = dim %[[ARG3]], %[[C0]] +// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]] +// CHECK: %[[N:.+]] = dim %[[ARG3]], %[[C1]] +// CHECK: %[[SV2:.+]] = subview %[[ARG3]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M_2]], %[[N]]] +// CHECK: %[[SV3:.+]] = subview %[[ARG0]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M]], %[[K]]] +// CHECK: linalg.copy(%[[SV3]], %[[SV1]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer" +// CHECK: linalg.fill(%[[SV2]], %[[CST]]) +// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer" +// CHECK-DAG: %[[N_2:.+]] = dim %[[ARG2]], %[[C1]] +// CHECK: scf.parallel (%[[IV1:.+]]) = +// CHECK-SAME: (%[[C0]]) to (%[[N_2]]) step (%[[C64]]) { +// CHECK-NEXT: scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] { +// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K]]] +// CHECK: %[[SV4:.+]] = subview %[[SV1]][0, %[[IV2]]] +// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]] +// CHECK: %[[K_2:.+]] = dim %[[ARG2]], %[[C0]] +// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K_2]]] +// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]] +// CHECK: %[[SV5:.+]] = subview %[[ARG2]][%[[IV2]], %[[IV1]]] +// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]] +// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N]]] +// CHECK: %[[SV6:.+]] = subview %[[SV2]][0, %[[IV1]]] +// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]] +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion" +// CHECK-SAME: ins(%[[SV4]], %[[SV5]] +// CHECK-SAME: : memref, memref) +// CHECK-SAME: outs(%[[SV6]] : memref) +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" + +// ----- + +module { + func @matmul_fusion(%arg0: memref, %arg1: memref, + %arg2: memref, %arg3: memref, + %arg4: memref) { + linalg.matmul ins(%arg0, %arg1 : memref, memref) + outs(%arg2 : memref) + linalg.matmul {__internal_linalg_transform__ = "lhs_fusion"} + ins(%arg2, %arg3 : memref, memref) + outs(%arg4 : memref) + return + } +} +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)> +// CHECK: func @matmul_fusion +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: memref +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: memref +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[C1:.+]] = constant 1 : index +// CHECK-DAG: %[[C32:.+]] = constant 32 : index +// CHECK-DAG: %[[C64:.+]] = constant 64 : index +// CHECK-DAG: %[[C16:.+]] = constant 16 : index +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_original" +// CHECK-DAG: %[[M:.+]] = dim %[[ARG2]], %[[C0]] +// CHECK: scf.parallel (%[[IV0:.+]]) = +// CHECK-SAME: (%[[C0]]) to (%[[M]]) step (%[[C32]]) { +// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]] +// CHECK: %[[K2:.+]] = dim %[[ARG2]], %[[C1]] +// CHECK: %[[SV1:.+]] = subview %[[ARG2]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M]], %[[K2]]] +// CHECK: %[[M_2:.+]] = dim %[[ARG4]], %[[C0]] +// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]] +// CHECK: %[[N:.+]] = dim %[[ARG4]], %[[C1]] +// CHECK: %[[SV2:.+]] = subview %[[ARG4]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M_2]], %[[N]]] +// CHECK: %[[K1:.+]] = dim %[[ARG0]], %[[C1]] +// CHECK: %[[SV3:.+]] = subview %[[ARG0]][%[[IV0]], 0] +// CHECK-SAME: [%[[TILE_M]], %[[K1]]] +// CHECK: %[[SV4:.+]] = subview %[[ARG1]][0, 0] [%[[K1]], %[[K2]]] +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_producer" +// CHECK-SAME: ins(%[[SV3]], %[[SV4]] +// CHECK-SAME: : memref, memref) +// CHECK-SAME: outs(%[[SV1]] : memref) +// CHECK-DAG: %[[N_2:.+]] = dim %[[ARG3]], %[[C1]] +// CHECK: scf.parallel (%[[IV1:.+]]) = +// CHECK-SAME: (%[[C0]]) to (%[[N_2]]) step (%[[C64]]) { +// CHECK-NEXT: scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] { +// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K]]] +// CHECK: %[[SV6:.+]] = subview %[[SV1]][0, %[[IV2]]] +// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]] +// CHECK: %[[K_2:.+]] = dim %[[ARG3]], %[[C0]] +// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K_2]]] +// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]] +// CHECK: %[[SV7:.+]] = subview %[[ARG3]][%[[IV2]], %[[IV1]]] +// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]] +// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N]]] +// CHECK: %[[SV8:.+]] = subview %[[SV2]][0, %[[IV1]]] +// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]] +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion" +// CHECK-SAME: ins(%[[SV6]], %[[SV7]] +// CHECK-SAME: : memref, memref) +// CHECK-SAME: outs(%[[SV8]] : memref) +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: linalg.matmul +// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_original" diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index 3c82554fa13a3..5bf606209ec2c 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -16,6 +16,7 @@ add_mlir_library(MLIRTestTransforms TestGpuMemoryPromotion.cpp TestGpuParallelLoopMapping.cpp TestInlining.cpp + TestLinalgFusionTransforms.cpp TestLinalgHoisting.cpp TestLinalgTransforms.cpp TestLiveness.cpp diff --git a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp new file mode 100644 index 0000000000000..9a376c548900b --- /dev/null +++ b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp @@ -0,0 +1,112 @@ +//===- TestLinalgFusionTransforms.cpp - Test Linalg fusion patterns -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic for testing Linalg fusion patterns. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; +using namespace mlir::linalg; + +namespace { +struct TestLinalgFusionTransforms + : public PassWrapper { + TestLinalgFusionTransforms() = default; + TestLinalgFusionTransforms(const TestLinalgFusionTransforms &pass) {} + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnFunction() override; +}; +} // namespace + +static void fillFusionPatterns(MLIRContext *context, + const LinalgDependenceGraph &dependenceGraph, + OwningRewritePatternList &patterns) { + patterns.insert>( + context, dependenceGraph, + LinalgTilingOptions() + .setTileSizes({32, 64, 16}) + .setLoopType(LinalgTilingLoopType::ParallelLoops), + LinalgFusionOptions(), + LinalgMarker(Identifier::get("basic_fusion", context), + Identifier::get("after_basic_fusion", context)), + LinalgMarker(ArrayRef(), + Identifier::get("after_basic_fusion_producer", context)), + LinalgMarker(ArrayRef(), + Identifier::get("after_basic_fusion_original", context))); + + patterns.insert>( + context, dependenceGraph, + LinalgTilingOptions() + .setTileSizes({32, 64, 16}) + .setLoopType(LinalgTilingLoopType::ParallelLoops), + LinalgFusionOptions().setIndicesToFuse({0}), + LinalgMarker(Identifier::get("lhs_fusion", context), + Identifier::get("after_lhs_fusion", context)), + LinalgMarker(ArrayRef(), + Identifier::get("after_lhs_fusion_producer", context)), + LinalgMarker(ArrayRef(), + Identifier::get("after_lhs_fusion_original", context))); + + patterns.insert>( + context, dependenceGraph, + LinalgTilingOptions() + .setTileSizes({32, 64, 16}) + .setLoopType(LinalgTilingLoopType::ParallelLoops), + LinalgFusionOptions().setIndicesToFuse({1}), + LinalgMarker(Identifier::get("rhs_fusion", context), + Identifier::get("after_rhs_fusion", context)), + LinalgMarker(ArrayRef(), + Identifier::get("after_rhs_fusion_producer", context)), + LinalgMarker(ArrayRef(), + Identifier::get("after_rhs_fusion_original", context))); + + patterns.insert>( + context, dependenceGraph, + LinalgTilingOptions() + .setTileSizes({32, 64, 16}) + .setLoopType(LinalgTilingLoopType::ParallelLoops), + LinalgFusionOptions().setIndicesToFuse({0, 2}), + LinalgMarker(Identifier::get("two_operand_fusion", context), + Identifier::get("after_two_operand_fusion", context)), + LinalgMarker( + ArrayRef(), + Identifier::get("after_two_operand_fusion_producer", context)), + LinalgMarker( + ArrayRef(), + Identifier::get("after_two_operand_fusion_original", context))); +} + +static void applyFusionPatterns(MLIRContext *context, FuncOp funcOp) { + OwningRewritePatternList fusionPatterns; + Aliases alias; + LinalgDependenceGraph dependenceGraph = + LinalgDependenceGraph::buildDependenceGraph(alias, funcOp); + fillFusionPatterns(context, dependenceGraph, fusionPatterns); + applyPatternsAndFoldGreedily(funcOp, fusionPatterns); +} + +void TestLinalgFusionTransforms::runOnFunction() { + applyFusionPatterns(&getContext(), getFunction()); +} + +namespace mlir { +void registerTestLinalgFusionTransforms() { + PassRegistration testFusionTransformsPass( + "test-linalg-fusion-transform-patterns", + "Test Linalg fusion transformation patterns by applying them greedily."); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index aed8b0ae818b6..0389c70be3d63 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -58,6 +58,7 @@ void registerTestFunc(); void registerTestGpuMemoryPromotionPass(); void registerTestGpuParallelLoopMappingPass(); void registerTestInterfaces(); +void registerTestLinalgFusionTransforms(); void registerTestLinalgHoisting(); void registerTestLinalgTransforms(); void registerTestLivenessPass(); @@ -114,6 +115,7 @@ void registerTestPasses() { registerTestExpandTanhPass(); registerTestGpuMemoryPromotionPass(); registerTestInterfaces(); + registerTestLinalgFusionTransforms(); registerTestLinalgHoisting(); registerTestLinalgTransforms(); registerTestLivenessPass(); From 7475bd5411a3f62a7860db09a5bcf1fc147c43d6 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 30 Sep 2020 03:02:41 -0700 Subject: [PATCH 211/544] [Msan] Add ptsname, ptsname_r interceptors Reviewed By: eugenis, MaskRay Differential Revision: https://reviews.llvm.org/D88547 --- .../sanitizer_common_interceptors.inc | 30 +++++++++++++++++++ .../sanitizer_platform_interceptors.h | 2 ++ .../TestCases/Linux/ptsname.c | 27 +++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/ptsname.c diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 80035349b659e..4ea35ae368ede 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -4867,6 +4867,34 @@ INTERCEPTOR(char *, tmpnam_r, char *s) { #define INIT_TMPNAM_R #endif +#if SANITIZER_INTERCEPT_PTSNAME +INTERCEPTOR(char *, ptsname, int fd) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, ptsname, fd); + char *res = REAL(ptsname)(fd); + if (res != nullptr) + COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1); + return res; +} +#define INIT_PTSNAME COMMON_INTERCEPT_FUNCTION(ptsname); +#else +#define INIT_PTSNAME +#endif + +#if SANITIZER_INTERCEPT_PTSNAME_R +INTERCEPTOR(int, ptsname_r, int fd, char *name, SIZE_T namesize) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, ptsname_r, fd, name, namesize); + int res = REAL(ptsname_r)(fd, name, namesize); + if (res == 0) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, REAL(strlen)(name) + 1); + return res; +} +#define INIT_PTSNAME_R COMMON_INTERCEPT_FUNCTION(ptsname_r); +#else +#define INIT_PTSNAME_R +#endif + #if SANITIZER_INTERCEPT_TTYNAME INTERCEPTOR(char *, ttyname, int fd) { void *ctx; @@ -10166,6 +10194,8 @@ static void InitializeCommonInterceptors() { INIT_PTHREAD_BARRIERATTR_GETPSHARED; INIT_TMPNAM; INIT_TMPNAM_R; + INIT_PTSNAME; + INIT_PTSNAME_R; INIT_TTYNAME; INIT_TTYNAME_R; INIT_TEMPNAM; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index c28ac55136692..c6138e785afe1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -384,6 +384,8 @@ #define SANITIZER_INTERCEPT_THR_EXIT SI_FREEBSD #define SANITIZER_INTERCEPT_TMPNAM SI_POSIX #define SANITIZER_INTERCEPT_TMPNAM_R SI_LINUX_NOT_ANDROID || SI_SOLARIS +#define SANITIZER_INTERCEPT_PTSNAME SI_LINUX +#define SANITIZER_INTERCEPT_PTSNAME_R SI_LINUX #define SANITIZER_INTERCEPT_TTYNAME SI_POSIX #define SANITIZER_INTERCEPT_TTYNAME_R SI_POSIX #define SANITIZER_INTERCEPT_TEMPNAM SI_POSIX diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/ptsname.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/ptsname.c new file mode 100644 index 0000000000000..8fa1d37012968 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/ptsname.c @@ -0,0 +1,27 @@ +// RUN: %clang %s -o %t && %run %t + +#define _GNU_SOURCE +#define _XOPEN_SOURCE 600 + +#include +#include +#include +#include +#include + +int main() { + int pt = posix_openpt(O_NOCTTY); + if (pt == -1) + return 0; + char *s = ptsname(pt); + assert(s); + assert(strstr(s, "/dev")); + + char buff[1000] = {}; + int r = ptsname_r(pt, buff, sizeof(buff)); + assert(!r); + assert(strstr(buff, "/dev")); + + close(pt); + return 0; +} From 722d792499a4b60dd582f870cbdfb572897906b4 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 30 Sep 2020 15:01:33 -0700 Subject: [PATCH 212/544] [AMDGPU] Reorganize VOP3P encoding This changes width of encoding and opcode fields to match the documentation. Differential Revision: https://reviews.llvm.org/D88619 --- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 174 ++++++++++---------- llvm/lib/Target/AMDGPU/VOPInstructions.td | 16 +- 2 files changed, 95 insertions(+), 95 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 446e87ab3fc98..393fc8b09d446 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -418,7 +418,7 @@ def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F3 def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; -multiclass VOP3P_Real_vi op> { +multiclass VOP3P_Real_vi op> { def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, VOP3Pe (NAME).Pfl> { let AssemblerPredicate = HasVOP3PInsts; @@ -426,7 +426,7 @@ multiclass VOP3P_Real_vi op> { } } -multiclass VOP3P_Real_MAI op> { +multiclass VOP3P_Real_MAI op> { def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, VOP3Pe_MAI (NAME).Pfl> { let AssemblerPredicate = HasMAIInsts; @@ -434,32 +434,32 @@ multiclass VOP3P_Real_MAI op> { } } -defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>; -defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>; -defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>; -defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>; -defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>; -defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>; -defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>; -defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>; -defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>; -defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>; - -defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>; -defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>; -defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>; -defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>; -defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>; -defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>; -defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>; -defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>; -defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>; +defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>; +defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>; +defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>; +defm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>; +defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>; +defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>; +defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>; +defm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>; +defm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>; +defm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>; + +defm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>; +defm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>; +defm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>; +defm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>; +defm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>; +defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>; +defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>; +defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; +defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; let SubtargetPredicate = HasMadMixInsts in { -defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>; -defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>; -defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>; +defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; +defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; +defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; } let SubtargetPredicate = HasFmaMixInsts in { @@ -467,54 +467,54 @@ let DecoderNamespace = "GFX9_DL" in { // The mad_mix instructions were renamed and their behaviors changed, // but the opcode stayed the same so we need to put these in a // different DecoderNamespace to avoid the ambiguity. -defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x3a0>; -defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x3a1>; -defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>; +defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>; +defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>; +defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>; } } let SubtargetPredicate = HasDot2Insts in { -defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>; -defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>; -defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>; -defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>; -defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>; +defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>; +defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>; +defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>; +defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>; +defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>; } // End SubtargetPredicate = HasDot2Insts let SubtargetPredicate = HasDot1Insts in { -defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>; -defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>; +defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>; +defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>; } // End SubtargetPredicate = HasDot1Insts let SubtargetPredicate = HasMAIInsts in { -defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x3d8>; -defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>; -defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x3c0>; -defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x3c1>; -defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x3c2>; -defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x3c4>; -defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x3c5>; -defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x3c8>; -defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x3c9>; -defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x3ca>; -defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x3cc>; -defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>; -defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x3d0>; -defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x3d1>; -defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x3d2>; -defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x3d4>; -defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x3d5>; -defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>; -defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>; -defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x3eb>; -defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>; -defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>; +defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; +defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; +defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x40>; +defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x41>; +defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x42>; +defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x44>; +defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x45>; +defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x48>; +defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x49>; +defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x4a>; +defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x4c>; +defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x4d>; +defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x50>; +defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x51>; +defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x52>; +defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x54>; +defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x55>; +defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x68>; +defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x69>; +defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x6b>; +defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x6c>; +defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x6d>; } // End SubtargetPredicate = HasMAIInsts @@ -523,48 +523,48 @@ defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>; //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { - multiclass VOP3P_Real_gfx10 op> { + multiclass VOP3P_Real_gfx10 op> { def _gfx10 : VOP3P_Real(NAME), SIEncodingFamily.GFX10>, VOP3Pe_gfx10 (NAME).Pfl>; } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>; -defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>; -defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>; -defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>; -defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>; -defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>; -defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>; -defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>; -defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>; -defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>; -defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>; -defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>; -defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>; -defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>; -defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>; -defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>; -defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>; -defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>; -defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>; -defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>; -defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>; -defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>; +defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>; +defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>; +defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>; +defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>; +defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>; +defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>; +defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>; +defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>; +defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>; +defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>; +defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>; +defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>; +defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>; +defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>; +defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>; +defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>; +defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>; +defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>; +defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>; +defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>; +defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>; +defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>; let SubtargetPredicate = HasDot2Insts in { -defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>; -defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>; -defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>; -defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>; -defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>; +defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>; +defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>; +defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>; +defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>; +defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>; } // End SubtargetPredicate = HasDot2Insts let SubtargetPredicate = HasDot1Insts in { -defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>; -defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>; +defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>; +defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>; } // End SubtargetPredicate = HasDot1Insts diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index ab1915de0c734..b27a1d31863df 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -296,7 +296,7 @@ class VOP3be : Enc64 { let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); } -class VOP3Pe op, VOPProfile P> : Enc64 { +class VOP3Pe op, VOPProfile P> : Enc64 { bits<8> vdst; // neg, neg_hi, op_sel put in srcN_modifiers bits<4> src0_modifiers; @@ -320,8 +320,8 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{15} = !if(P.HasClamp, clamp{0}, 0); - let Inst{25-16} = op; - let Inst{31-26} = 0x34; //encoding + let Inst{22-16} = op; + let Inst{31-23} = 0x1a7; //encoding let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); @@ -332,7 +332,7 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) } -class VOP3Pe_MAI op, VOPProfile P> : Enc64 { +class VOP3Pe_MAI op, VOPProfile P> : Enc64 { bits<8> vdst; bits<10> src0; bits<10> src1; @@ -349,8 +349,8 @@ class VOP3Pe_MAI op, VOPProfile P> : Enc64 { let Inst{15} = !if(P.HasClamp, clamp{0}, 0); - let Inst{25-16} = op; - let Inst{31-26} = 0x34; //encoding + let Inst{22-16} = op; + let Inst{31-23} = 0x1a7; //encoding let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0); let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); @@ -362,8 +362,8 @@ class VOP3Pe_MAI op, VOPProfile P> : Enc64 { } -class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { - let Inst{31-26} = 0x33; //encoding +class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { + let Inst{31-23} = 0x198; //encoding } class VOP3be_gfx6_gfx7 op, VOPProfile p> : VOP3be

{ From 5519e4da83d1abc66620334692394749eceb0e50 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 30 Sep 2020 14:55:51 -0700 Subject: [PATCH 213/544] Re-land "[PDB] Merge types in parallel when using ghashing" Stored Error objects have to be checked, even if they are success values. This reverts commit 8d250ac3cd48d0f17f9314685a85e77895c05351. Relands commit 49b3459930655d879b2dc190ff8fe11c38a8be5f.. Original commit message: ----------------------------------------- This makes type merging much faster (-24% on chrome.dll) when multiple threads are available, but it slightly increases the time to link (+10%) when /threads:1 is passed. With only one more thread, the new type merging is faster (-11%). The output PDB should be identical to what it was before this change. To give an idea, here is the /time output placed side by side: BEFORE | AFTER Input File Reading: 956 ms | 968 ms Code Layout: 258 ms | 190 ms Commit Output File: 6 ms | 7 ms PDB Emission (Cumulative): 6691 ms | 4253 ms Add Objects: 4341 ms | 2927 ms Type Merging: 2814 ms | 1269 ms -55%! Symbol Merging: 1509 ms | 1645 ms Publics Stream Layout: 111 ms | 112 ms TPI Stream Layout: 764 ms | 26 ms trivial Commit to Disk: 1322 ms | 1036 ms -300ms ----------------------------------------- -------- Total Link Time: 8416 ms 5882 ms -30% overall The main source of the additional overhead in the single-threaded case is the need to iterate all .debug$T sections up front to check which type records should go in the IPI stream. See fillIsItemIndexFromDebugT. With changes to the .debug$H section, we could pre-calculate this info and eliminate the need to do this walk up front. That should restore single-threaded performance back to what it was before this change. This change will cause LLD to be much more parallel than it used to, and for users who do multiple links in parallel, it could regress performance. However, when the user is only doing one link, it's a huge improvement. In the future, we can use NT worker threads to avoid oversaturating the machine with work, but for now, this is such an improvement for the single-link use case that I think we should land this as is. Algorithm ---------- Before this change, we essentially used a DenseMap to check if a type has already been seen, and if it hasn't been seen, insert it now and use the next available type index for it in the destination type stream. DenseMap does not support concurrent insertion, and even if it did, the linker must be deterministic: it cannot produce different PDBs by using different numbers of threads. The output type stream must be in the same order regardless of the order of hash table insertions. In order to create a hash table that supports concurrent insertion, the table cells must be small enough that they can be updated atomically. The algorithm I used for updating the table using linear probing is described in this paper, "Concurrent Hash Tables: Fast and General(?)!": https://dl.acm.org/doi/10.1145/3309206 The GHashCell in this change is essentially a pair of 32-bit integer indices: . The sourceIndex is the index of the TpiSource object, and it represents an input type stream. The typeIndex is the index of the type in the stream. Together, we have something like a ragged 2D array of ghashes, which can be looked up as: tpiSources[tpiSrcIndex]->ghashes[typeIndex] By using these side tables, we can omit the key data from the hash table, and keep the table cell small. There is a cost to this: resolving hash table collisions requires many more loads than simply looking at the key in the same cache line as the insertion position. However, most supported platforms should have a 64-bit CAS operation to update the cell atomically. To make the result of concurrent insertion deterministic, the cell payloads must have a priority function. Defining one is pretty straightforward: compare the two 32-bit numbers as a combined 64-bit number. This means that types coming from inputs earlier on the command line have a higher priority and are more likely to appear earlier in the final PDB type stream than types from an input appearing later on the link line. After table insertion, the non-empty cells in the table can be copied out of the main table and sorted by priority to determine the ordering of the final type index stream. At this point, item and type records must be separated, either by sorting or by splitting into two arrays, and I chose sorting. This is why the GHashCell must contain the isItem bit. Once the final PDB TPI stream ordering is known, we need to compute a mapping from source type index to PDB type index. To avoid starting over from scratch and looking up every type again by its ghash, we save the insertion position of every hash table insertion during the first insertion phase. Because the table does not support rehashing, the insertion position is stable. Using the array of insertion positions indexed by source type index, we can replace the source type indices in the ghash table cells with the PDB type indices. Once the table cells have been updated to contain PDB type indices, the mapping for each type source can be computed in parallel. Simply iterate the list of cell positions and replace them with the PDB type index, since the insertion positions are no longer needed. Once we have a source to destination type index mapping for every type source, there are no more data dependencies. We know which type records are "unique" (not duplicates), and what their final type indices will be. We can do the remapping in parallel, and accumulate type sizes and type hashes in parallel by type source. Lastly, TPI stream layout must be done serially. Accumulate all the type records, sizes, and hashes, and add them to the PDB. Differential Revision: https://reviews.llvm.org/D87805 --- lld/COFF/DebugTypes.cpp | 847 ++++++++++++++++-- lld/COFF/DebugTypes.h | 116 ++- lld/COFF/Driver.cpp | 2 +- lld/COFF/PDB.cpp | 178 ++-- lld/COFF/PDB.h | 6 + lld/COFF/TypeMerger.h | 30 +- lld/include/lld/Common/ErrorHandler.h | 7 + lld/test/COFF/pdb-global-hashes.test | 2 +- lld/test/COFF/pdb-procid-remapping.test | 8 +- lld/test/COFF/pdb-type-server-missing.yaml | 1 + lld/test/COFF/pdb-type-server-simple.test | 9 +- lld/test/COFF/precomp-link.test | 10 +- lld/test/COFF/s_udt.s | 2 + .../llvm/DebugInfo/CodeView/TypeHashing.h | 12 +- .../llvm/DebugInfo/CodeView/TypeIndex.h | 11 +- .../DebugInfo/PDB/Native/TpiStreamBuilder.h | 9 +- llvm/lib/DebugInfo/CodeView/RecordName.cpp | 8 +- .../DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 62 +- 18 files changed, 1084 insertions(+), 236 deletions(-) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 46959334e6676..557bdd9c04b3d 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -10,9 +10,12 @@ #include "Chunks.h" #include "Driver.h" #include "InputFiles.h" +#include "PDB.h" #include "TypeMerger.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Timer.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" @@ -20,7 +23,10 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" using namespace llvm; @@ -54,6 +60,10 @@ class TypeServerSource : public TpiSource { } Error mergeDebugT(TypeMerger *m) override; + + void loadGHashes() override; + void remapTpiWithGHashes(GHashState *g) override; + bool isDependency() const override { return true; } PDBInputFile *pdbInputFile = nullptr; @@ -73,22 +83,29 @@ class TypeServerIpiSource : public TpiSource { friend class TypeServerSource; - // IPI merging is handled in TypeServerSource::mergeDebugT, since it depends - // directly on type merging. + // All of the TpiSource methods are no-ops. The parent TypeServerSource + // handles both TPI and IPI. Error mergeDebugT(TypeMerger *m) override { return Error::success(); } - + void loadGHashes() override {} + void remapTpiWithGHashes(GHashState *g) override {} bool isDependency() const override { return true; } }; // This class represents the debug type stream of an OBJ file that depends on a // PDB type server (see TypeServerSource). class UseTypeServerSource : public TpiSource { + Expected getTypeServerSource(); + public: UseTypeServerSource(ObjFile *f, TypeServer2Record ts) : TpiSource(UsingPDB, f), typeServerDependency(ts) {} Error mergeDebugT(TypeMerger *m) override; + // No need to load ghashes from /Zi objects. + void loadGHashes() override {} + void remapTpiWithGHashes(GHashState *g) override; + // Information about the PDB type server dependency, that needs to be loaded // in before merging this OBJ. TypeServer2Record typeServerDependency; @@ -110,6 +127,8 @@ class PrecompSource : public TpiSource { toString(it.first->second->file) + " and " + toString(file) + ")"); } + void loadGHashes() override; + bool isDependency() const override { return true; } static std::map mappings; @@ -124,20 +143,49 @@ class UsePrecompSource : public TpiSource { Error mergeDebugT(TypeMerger *m) override; + void loadGHashes() override; + void remapTpiWithGHashes(GHashState *g) override; + +private: + Error mergeInPrecompHeaderObj(); + +public: // Information about the Precomp OBJ dependency, that needs to be loaded in // before merging this OBJ. PrecompRecord precompDependency; }; } // namespace -static std::vector gc; +std::vector TpiSource::instances; +ArrayRef TpiSource::dependencySources; +ArrayRef TpiSource::objectSources; -TpiSource::TpiSource(TpiKind k, ObjFile *f) : kind(k), file(f) { - gc.push_back(this); +TpiSource::TpiSource(TpiKind k, ObjFile *f) + : kind(k), tpiSrcIdx(instances.size()), file(f) { + instances.push_back(this); } // Vtable key method. -TpiSource::~TpiSource() = default; +TpiSource::~TpiSource() { + // Silence any assertions about unchecked errors. + consumeError(std::move(typeMergingError)); +} + +void TpiSource::sortDependencies() { + // Order dependencies first, but preserve the existing order. + std::vector deps; + std::vector objs; + for (TpiSource *s : instances) + (s->isDependency() ? deps : objs).push_back(s); + uint32_t numDeps = deps.size(); + uint32_t numObjs = objs.size(); + instances = std::move(deps); + instances.insert(instances.end(), objs.begin(), objs.end()); + for (uint32_t i = 0, e = instances.size(); i < e; ++i) + instances[i]->tpiSrcIdx = i; + dependencySources = makeArrayRef(instances.data(), numDeps); + objectSources = makeArrayRef(instances.data() + numDeps, numObjs); +} TpiSource *lld::coff::makeTpiSource(ObjFile *file) { return make(TpiSource::Regular, file); @@ -165,14 +213,68 @@ TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file, return make(file, precomp); } -void TpiSource::forEachSource(llvm::function_ref fn) { - for_each(gc, fn); -} - std::map TypeServerSource::mappings; std::map PrecompSource::mappings; +bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { + if (ti.isSimple()) + return true; + + // This can be an item index or a type index. Choose the appropriate map. + ArrayRef tpiOrIpiMap = + (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap; + if (ti.toArrayIndex() >= tpiOrIpiMap.size()) + return false; + ti = tpiOrIpiMap[ti.toArrayIndex()]; + return true; +} + +void TpiSource::remapRecord(MutableArrayRef rec, + ArrayRef typeRefs) { + MutableArrayRef contents = rec.drop_front(sizeof(RecordPrefix)); + for (const TiReference &ref : typeRefs) { + unsigned byteSize = ref.Count * sizeof(TypeIndex); + if (contents.size() < ref.Offset + byteSize) + fatal("symbol record too short"); + + MutableArrayRef indices( + reinterpret_cast(contents.data() + ref.Offset), ref.Count); + for (TypeIndex &ti : indices) { + if (!remapTypeIndex(ti, ref.Kind)) { + if (config->verbose) { + uint16_t kind = + reinterpret_cast(rec.data())->RecordKind; + StringRef fname = file ? file->getName() : ""; + log("failed to remap type index in record of kind 0x" + + utohexstr(kind) + " in " + fname + " with bad " + + (ref.Kind == TiRefKind::IndexRef ? "item" : "type") + + " index 0x" + utohexstr(ti.getIndex())); + } + ti = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + } + } +} + +void TpiSource::remapTypesInTypeRecord(MutableArrayRef rec) { + // TODO: Handle errors similar to symbols. + SmallVector typeRefs; + discoverTypeIndices(CVType(rec), typeRefs); + remapRecord(rec, typeRefs); +} + +bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef rec) { + // Discover type index references in the record. Skip it if we don't + // know where they are. + SmallVector typeRefs; + if (!discoverTypeIndicesInSymbol(rec, typeRefs)) + return false; + remapRecord(rec, typeRefs); + return true; +} + // A COFF .debug$H section is currently a clang extension. This function checks // if a .debug$H section is in a format that we expect / understand, so that we // can ignore any sections which are coincidentally also named .debug$H but do @@ -203,7 +305,6 @@ static Optional> getDebugH(ObjFile *file) { static ArrayRef getHashesFromDebugH(ArrayRef debugH) { assert(canUseDebugH(debugH)); - debugH = debugH.drop_front(sizeof(object::debug_h_header)); uint32_t count = debugH.size() / sizeof(GloballyHashedType); return {reinterpret_cast(debugH.data()), count}; @@ -211,32 +312,17 @@ getHashesFromDebugH(ArrayRef debugH) { // Merge .debug$T for a generic object file. Error TpiSource::mergeDebugT(TypeMerger *m) { + assert(!config->debugGHashes && + "use remapTpiWithGHashes when ghash is enabled"); + CVTypeArray types; BinaryStreamReader reader(file->debugTypes, support::little); cantFail(reader.readArray(types, reader.getLength())); - if (config->debugGHashes) { - ArrayRef hashes; - std::vector ownedHashes; - if (Optional> debugH = getDebugH(file)) - hashes = getHashesFromDebugH(*debugH); - else { - ownedHashes = GloballyHashedType::hashTypes(types); - hashes = ownedHashes; - } - - if (auto err = mergeTypeAndIdRecords(m->globalIDTable, m->globalTypeTable, - indexMapStorage, types, hashes, - file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); - } else { - if (auto err = - mergeTypeAndIdRecords(m->idTable, m->typeTable, indexMapStorage, - types, file->pchSignature)) - fatal("codeview::mergeTypeAndIdRecords failed: " + - toString(std::move(err))); - } + if (auto err = mergeTypeAndIdRecords( + m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature)) + fatal("codeview::mergeTypeAndIdRecords failed: " + + toString(std::move(err))); // In an object, there is only one mapping for both types and items. tpiMap = indexMapStorage; @@ -267,6 +353,9 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { // Merge types from a type server PDB. Error TypeServerSource::mergeDebugT(TypeMerger *m) { + assert(!config->debugGHashes && + "use remapTpiWithGHashes when ghash is enabled"); + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); Expected expectedTpi = pdbFile.getPDBTpiStream(); if (auto e = expectedTpi.takeError()) @@ -279,45 +368,18 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { maybeIpi = &*expectedIpi; } - if (config->debugGHashes) { - // PDBs do not actually store global hashes, so when merging a type server - // PDB we have to synthesize global hashes. To do this, we first synthesize - // global hashes for the TPI stream, since it is independent, then we - // synthesize hashes for the IPI stream, using the hashes for the TPI stream - // as inputs. - auto tpiHashes = GloballyHashedType::hashTypes(expectedTpi->typeArray()); - Optional endPrecomp; - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = - mergeTypeRecords(m->globalTypeTable, indexMapStorage, - expectedTpi->typeArray(), tpiHashes, endPrecomp)) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - auto ipiHashes = - GloballyHashedType::hashIds(maybeIpi->typeArray(), tpiHashes); - if (auto err = - mergeIdRecords(m->globalIDTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray(), ipiHashes)) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; - } - } else { - // Merge TPI first, because the IPI stream will reference type indices. - if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, - expectedTpi->typeArray())) - fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); - tpiMap = indexMapStorage; - - // Merge IPI. - if (maybeIpi) { - if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, - maybeIpi->typeArray())) - fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); - ipiMap = ipiSrc->indexMapStorage; - } + // Merge TPI first, because the IPI stream will reference type indices. + if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage, + expectedTpi->typeArray())) + fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err))); + tpiMap = indexMapStorage; + + // Merge IPI. + if (maybeIpi) { + if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage, + maybeIpi->typeArray())) + fatal("codeview::mergeIdRecords failed: " + toString(std::move(err))); + ipiMap = ipiSrc->indexMapStorage; } if (config->showSummary) { @@ -337,7 +399,7 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { return Error::success(); } -Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { +Expected UseTypeServerSource::getTypeServerSource() { const codeview::GUID &tsId = typeServerDependency.getGuid(); StringRef tsPath = typeServerDependency.getName(); @@ -357,8 +419,15 @@ Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { tsSrc = (TypeServerSource *)pdb->debugTypesObj; } + return tsSrc; +} - pdb::PDBFile &pdbSession = tsSrc->pdbInputFile->session->getPDBFile(); +Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { + Expected tsSrc = getTypeServerSource(); + if (!tsSrc) + return tsSrc.takeError(); + + pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile(); auto expectedInfo = pdbSession.getPDBInfoStream(); if (!expectedInfo) return expectedInfo.takeError(); @@ -368,12 +437,12 @@ Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { // must match the GUID specified in the TypeServer2 record. if (expectedInfo->getGuid() != typeServerDependency.getGuid()) return createFileError( - tsPath, + typeServerDependency.getName(), make_error(pdb::pdb_error_code::signature_out_of_date)); // Reuse the type index map of the type server. - tpiMap = tsSrc->tpiMap; - ipiMap = tsSrc->ipiMap; + tpiMap = (*tsSrc)->tpiMap; + ipiMap = (*tsSrc)->ipiMap; return Error::success(); } @@ -399,26 +468,28 @@ static PrecompSource *findObjByName(StringRef fileNameOnly) { return nullptr; } -static Expected findPrecompMap(ObjFile *file, - PrecompRecord &pr) { +static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) { // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, // the paths embedded in the OBJs are in the Windows format. SmallString<128> prFileName = sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows); - PrecompSource *precomp; auto it = PrecompSource::mappings.find(pr.getSignature()); if (it != PrecompSource::mappings.end()) { - precomp = it->second; - } else { - // Lookup by name - precomp = findObjByName(prFileName); + return it->second; } + // Lookup by name + return findObjByName(prFileName); +} + +static Expected findPrecompMap(ObjFile *file, + PrecompRecord &pr) { + PrecompSource *precomp = findPrecompSource(file, pr); if (!precomp) return createFileError( - prFileName, + pr.getPrecompFilePath(), make_error(pdb::pdb_error_code::no_matching_pch)); if (pr.getSignature() != file->pchSignature) @@ -437,11 +508,8 @@ static Expected findPrecompMap(ObjFile *file, /// Merges a precompiled headers TPI map into the current TPI map. The /// precompiled headers object will also be loaded and remapped in the /// process. -static Error -mergeInPrecompHeaderObj(ObjFile *file, - SmallVectorImpl &indexMapStorage, - PrecompRecord &precomp) { - auto e = findPrecompMap(file, precomp); +Error UsePrecompSource::mergeInPrecompHeaderObj() { + auto e = findPrecompMap(file, precompDependency); if (!e) return e.takeError(); @@ -449,11 +517,17 @@ mergeInPrecompHeaderObj(ObjFile *file, if (precompSrc->tpiMap.empty()) return Error::success(); - assert(precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex); - assert(precomp.getTypesCount() <= precompSrc->tpiMap.size()); + assert(precompDependency.getStartTypeIndex() == + TypeIndex::FirstNonSimpleIndex); + assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size()); // Use the previously remapped index map from the precompiled headers. indexMapStorage.append(precompSrc->tpiMap.begin(), - precompSrc->tpiMap.begin() + precomp.getTypesCount()); + precompSrc->tpiMap.begin() + + precompDependency.getTypesCount()); + + if (config->debugGHashes) + funcIdToType = precompSrc->funcIdToType; // FIXME: Save copy + return Error::success(); } @@ -462,8 +536,7 @@ Error UsePrecompSource::mergeDebugT(TypeMerger *m) { // precompiled headers object (/Yc) first. Some type indices in the current // object are referencing data in the precompiled headers object, so we need // both to be loaded. - if (Error e = - mergeInPrecompHeaderObj(file, indexMapStorage, precompDependency)) + if (Error e = mergeInPrecompHeaderObj()) return e; return TpiSource::mergeDebugT(m); @@ -478,7 +551,587 @@ uint32_t TpiSource::countPrecompObjs() { } void TpiSource::clear() { - gc.clear(); + // Clean up any owned ghash allocations. + clearGHashes(); + TpiSource::instances.clear(); TypeServerSource::mappings.clear(); PrecompSource::mappings.clear(); } + +//===----------------------------------------------------------------------===// +// Parellel GHash type merging implementation. +//===----------------------------------------------------------------------===// + +void TpiSource::loadGHashes() { + if (Optional> debugH = getDebugH(file)) { + ghashes = getHashesFromDebugH(*debugH); + ownedGHashes = false; + } else { + CVTypeArray types; + BinaryStreamReader reader(file->debugTypes, support::little); + cantFail(reader.readArray(types, reader.getLength())); + assignGHashesFromVector(GloballyHashedType::hashTypes(types)); + } + + fillIsItemIndexFromDebugT(); +} + +// Copies ghashes from a vector into an array. These are long lived, so it's +// worth the time to copy these into an appropriately sized vector to reduce +// memory usage. +void TpiSource::assignGHashesFromVector( + std::vector &&hashVec) { + GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()]; + memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType)); + ghashes = makeArrayRef(hashes, hashVec.size()); + ownedGHashes = true; +} + +// Faster way to iterate type records. forEachTypeChecked is faster than +// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. +static void forEachTypeChecked(ArrayRef types, + function_ref fn) { + checkError( + forEachCodeViewRecord(types, [fn](const CVType &ty) -> Error { + fn(ty); + return Error::success(); + })); +} + +// Walk over file->debugTypes and fill in the isItemIndex bit vector. +// TODO: Store this information in .debug$H so that we don't have to recompute +// it. This is the main bottleneck slowing down parallel ghashing with one +// thread over single-threaded ghashing. +void TpiSource::fillIsItemIndexFromDebugT() { + uint32_t index = 0; + isItemIndex.resize(ghashes.size()); + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + if (isIdRecord(ty.kind())) + isItemIndex.set(index); + ++index; + }); +} + +void TpiSource::mergeTypeRecord(CVType ty) { + // Decide if the merged type goes into TPI or IPI. + bool isItem = isIdRecord(ty.kind()); + MergedInfo &merged = isItem ? mergedIpi : mergedTpi; + + // Copy the type into our mutable buffer. + assert(ty.length() <= codeview::MaxRecordLength); + size_t offset = merged.recs.size(); + size_t newSize = alignTo(ty.length(), 4); + merged.recs.resize(offset + newSize); + auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize); + memcpy(newRec.data(), ty.data().data(), newSize); + + // Fix up the record prefix and padding bytes if it required resizing. + if (newSize != ty.length()) { + reinterpret_cast(newRec.data())->RecordLen = newSize - 2; + for (size_t i = ty.length(); i < newSize; ++i) + newRec[i] = LF_PAD0 + (newSize - i); + } + + // Remap the type indices in the new record. + remapTypesInTypeRecord(newRec); + uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); + merged.recSizes.push_back(static_cast(newSize)); + merged.recHashes.push_back(pdbHash); +} + +void TpiSource::mergeUniqueTypeRecords(ArrayRef typeRecords, + TypeIndex beginIndex) { + // Re-sort the list of unique types by index. + if (kind == PDB) + assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end())); + else + llvm::sort(uniqueTypes); + + // Accumulate all the unique types into one buffer in mergedTypes. + uint32_t ghashIndex = 0; + auto nextUniqueIndex = uniqueTypes.begin(); + assert(mergedTpi.recs.empty()); + assert(mergedIpi.recs.empty()); + forEachTypeChecked(typeRecords, [&](const CVType &ty) { + if (nextUniqueIndex != uniqueTypes.end() && + *nextUniqueIndex == ghashIndex) { + mergeTypeRecord(ty); + ++nextUniqueIndex; + } + if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { + bool success = ty.length() >= 12; + TypeIndex srcFuncIdIndex = beginIndex + ghashIndex; + TypeIndex funcId = srcFuncIdIndex; + TypeIndex funcType; + if (success) { + funcType = *reinterpret_cast(&ty.data()[8]); + success &= remapTypeIndex(funcId, TiRefKind::IndexRef); + success &= remapTypeIndex(funcType, TiRefKind::TypeRef); + } + if (success) { + funcIdToType.insert({funcId, funcType}); + } else { + StringRef fname = file ? file->getName() : ""; + warn("corrupt LF_[M]FUNC_ID record 0x" + + utohexstr(srcFuncIdIndex.getIndex()) + " in " + fname); + } + } + ++ghashIndex; + }); + assert(nextUniqueIndex == uniqueTypes.end() && + "failed to merge all desired records"); + assert(uniqueTypes.size() == + mergedTpi.recSizes.size() + mergedIpi.recSizes.size() && + "missing desired record"); +} + +void TpiSource::remapTpiWithGHashes(GHashState *g) { + assert(config->debugGHashes && "ghashes must be enabled"); + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + ipiMap = indexMapStorage; + mergeUniqueTypeRecords(file->debugTypes); + // TODO: Free all unneeded ghash resources now that we have a full index map. +} + +// PDBs do not actually store global hashes, so when merging a type server +// PDB we have to synthesize global hashes. To do this, we first synthesize +// global hashes for the TPI stream, since it is independent, then we +// synthesize hashes for the IPI stream, using the hashes for the TPI stream +// as inputs. +void TypeServerSource::loadGHashes() { + // Don't hash twice. + if (!ghashes.empty()) + return; + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); + + // Hash TPI stream. + Expected expectedTpi = pdbFile.getPDBTpiStream(); + if (auto e = expectedTpi.takeError()) + fatal("Type server does not have TPI stream: " + toString(std::move(e))); + assignGHashesFromVector( + GloballyHashedType::hashTypes(expectedTpi->typeArray())); + isItemIndex.resize(ghashes.size()); + + // Hash IPI stream, which depends on TPI ghashes. + if (!pdbFile.hasPDBIpiStream()) + return; + Expected expectedIpi = pdbFile.getPDBIpiStream(); + if (auto e = expectedIpi.takeError()) + fatal("error retreiving IPI stream: " + toString(std::move(e))); + ipiSrc->assignGHashesFromVector( + GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes)); + + // The IPI stream isItemIndex bitvector should be all ones. + ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size()); + ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size()); +} + +// Flatten discontiguous PDB type arrays to bytes so that we can use +// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from +// type servers is faster than iterating all object files compiled with /Z7 with +// CVTypeArray, which has high overheads due to the virtual interface of +// BinaryStream::readBytes. +static ArrayRef typeArrayToBytes(const CVTypeArray &types) { + BinaryStreamRef stream = types.getUnderlyingStream(); + ArrayRef debugTypes; + checkError(stream.readBytes(0, stream.getLength(), debugTypes)); + return debugTypes; +} + +// Merge types from a type server PDB. +void TypeServerSource::remapTpiWithGHashes(GHashState *g) { + assert(config->debugGHashes && "ghashes must be enabled"); + + // IPI merging depends on TPI, so do TPI first, then do IPI. No need to + // propagate errors, those should've been handled during ghash loading. + pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile(); + pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream()); + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray())); + if (pdbFile.hasPDBIpiStream()) { + pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream()); + ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size()); + ipiSrc->fillMapFromGHashes(g, ipiSrc->indexMapStorage); + ipiMap = ipiSrc->indexMapStorage; + ipiSrc->tpiMap = tpiMap; + ipiSrc->ipiMap = ipiMap; + ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); + funcIdToType = ipiSrc->funcIdToType; // FIXME: Save copy + } +} + +void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { + // No remapping to do with /Zi objects. Simply use the index map from the type + // server. Errors should have been reported earlier. Symbols from this object + // will be ignored. + Expected maybeTsSrc = getTypeServerSource(); + if (!maybeTsSrc) { + typeMergingError = + joinErrors(std::move(typeMergingError), maybeTsSrc.takeError()); + return; + } + TypeServerSource *tsSrc = *maybeTsSrc; + tpiMap = tsSrc->tpiMap; + ipiMap = tsSrc->ipiMap; + funcIdToType = tsSrc->funcIdToType; // FIXME: Save copy +} + +void PrecompSource::loadGHashes() { + if (getDebugH(file)) { + warn("ignoring .debug$H section; pch with ghash is not implemented"); + } + + uint32_t ghashIdx = 0; + std::vector hashVec; + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + // Remember the index of the LF_ENDPRECOMP record so it can be excluded from + // the PDB. There must be an entry in the list of ghashes so that the type + // indexes of the following records in the /Yc PCH object line up. + if (ty.kind() == LF_ENDPRECOMP) + endPrecompGHashIdx = ghashIdx; + + hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); + isItemIndex.push_back(isIdRecord(ty.kind())); + ++ghashIdx; + }); + assignGHashesFromVector(std::move(hashVec)); +} + +void UsePrecompSource::loadGHashes() { + PrecompSource *pchSrc = findPrecompSource(file, precompDependency); + if (!pchSrc) + return; + + // To compute ghashes of a /Yu object file, we need to build on the the + // ghashes of the /Yc PCH object. After we are done hashing, discard the + // ghashes from the PCH source so we don't unnecessarily try to deduplicate + // them. + std::vector hashVec = + pchSrc->ghashes.take_front(precompDependency.getTypesCount()); + forEachTypeChecked(file->debugTypes, [&](const CVType &ty) { + hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec)); + isItemIndex.push_back(isIdRecord(ty.kind())); + }); + hashVec.erase(hashVec.begin(), + hashVec.begin() + precompDependency.getTypesCount()); + assignGHashesFromVector(std::move(hashVec)); +} + +void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { + // This object was compiled with /Yu, so process the corresponding + // precompiled headers object (/Yc) first. Some type indices in the current + // object are referencing data in the precompiled headers object, so we need + // both to be loaded. + if (Error e = mergeInPrecompHeaderObj()) { + typeMergingError = joinErrors(std::move(typeMergingError), std::move(e)); + return; + } + + fillMapFromGHashes(g, indexMapStorage); + tpiMap = indexMapStorage; + ipiMap = indexMapStorage; + mergeUniqueTypeRecords(file->debugTypes, + TypeIndex(precompDependency.getStartTypeIndex() + + precompDependency.getTypesCount())); +} + +namespace { +/// A concurrent hash table for global type hashing. It is based on this paper: +/// Concurrent Hash Tables: Fast and General(?)! +/// https://dl.acm.org/doi/10.1145/3309206 +/// +/// This hash table is meant to be used in two phases: +/// 1. concurrent insertions +/// 2. concurrent reads +/// It does not support lookup, deletion, or rehashing. It uses linear probing. +/// +/// The paper describes storing a key-value pair in two machine words. +/// Generally, the values stored in this map are type indices, and we can use +/// those values to recover the ghash key from a side table. This allows us to +/// shrink the table entries further at the cost of some loads, and sidesteps +/// the need for a 128 bit atomic compare-and-swap operation. +/// +/// During insertion, a priority function is used to decide which insertion +/// should be preferred. This ensures that the output is deterministic. For +/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. +/// +class GHashCell; +struct GHashTable { + GHashCell *table = nullptr; + uint32_t tableSize = 0; + + GHashTable() = default; + ~GHashTable(); + + /// Initialize the table with the given size. Because the table cannot be + /// resized, the initial size of the table must be large enough to contain all + /// inputs, or insertion may not be able to find an empty cell. + void init(uint32_t newTableSize); + + /// Insert the cell with the given ghash into the table. Return the insertion + /// position in the table. It is safe for the caller to store the insertion + /// position because the table cannot be resized. + uint32_t insert(GloballyHashedType ghash, GHashCell newCell); +}; + +/// A ghash table cell for deduplicating types from TpiSources. +class GHashCell { + uint64_t data = 0; + +public: + GHashCell() = default; + + // Construct data most to least significant so that sorting works well: + // - isItem + // - tpiSrcIdx + // - ghashIdx + // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a + // non-zero representation. + GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx) + : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) | + ghashIdx) { + assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure"); + assert(ghashIdx == getGHashIdx() && "round trip failure"); + } + + explicit GHashCell(uint64_t data) : data(data) {} + + // The empty cell is all zeros. + bool isEmpty() const { return data == 0ULL; } + + /// Extract the tpiSrcIdx. + uint32_t getTpiSrcIdx() const { + return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1; + } + + /// Extract the index into the ghash array of the TpiSource. + uint32_t getGHashIdx() const { return (uint32_t)data; } + + bool isItem() const { return data & (1ULL << 63U); } + + /// Get the ghash key for this cell. + GloballyHashedType getGHash() const { + return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()]; + } + + /// The priority function for the cell. The data is stored such that lower + /// tpiSrcIdx and ghashIdx values are preferred, which means that type record + /// from earlier sources are more likely to prevail. + friend inline bool operator<(const GHashCell &l, const GHashCell &r) { + return l.data < r.data; + } +}; +} // namespace + +namespace lld { +namespace coff { +/// This type is just a wrapper around GHashTable with external linkage so it +/// can be used from a header. +struct GHashState { + GHashTable table; +}; +} // namespace coff +} // namespace lld + +GHashTable::~GHashTable() { delete[] table; } + +void GHashTable::init(uint32_t newTableSize) { + table = new GHashCell[newTableSize]; + memset(table, 0, newTableSize * sizeof(GHashCell)); + tableSize = newTableSize; +} + +uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) { + assert(!newCell.isEmpty() && "cannot insert empty cell value"); + + // FIXME: The low bytes of SHA1 have low entropy for short records, which + // type records are. Swap the byte order for better entropy. A better ghash + // won't need this. + uint32_t startIdx = + ByteSwap_64(*reinterpret_cast(&ghash)) % tableSize; + + // Do a linear probe starting at startIdx. + uint32_t idx = startIdx; + while (true) { + // Run a compare and swap loop. There are four cases: + // - cell is empty: CAS into place and return + // - cell has matching key, earlier priority: do nothing, return + // - cell has matching key, later priority: CAS into place and return + // - cell has non-matching key: hash collision, probe next cell + auto *cellPtr = reinterpret_cast *>(&table[idx]); + GHashCell oldCell(cellPtr->load()); + while (oldCell.isEmpty() || oldCell.getGHash() == ghash) { + // Check if there is an existing ghash entry with a higher priority + // (earlier ordering). If so, this is a duplicate, we are done. + if (!oldCell.isEmpty() && oldCell < newCell) + return idx; + // Either the cell is empty, or our value is higher priority. Try to + // compare and swap. If it succeeds, we are done. + if (cellPtr->compare_exchange_weak(oldCell, newCell)) + return idx; + // If the CAS failed, check this cell again. + } + + // Advance the probe. Wrap around to the beginning if we run off the end. + ++idx; + idx = idx == tableSize ? 0 : idx; + if (idx == startIdx) { + // If this becomes an issue, we could mark failure and rehash from the + // beginning with a bigger table. There is no difference between rehashing + // internally and starting over. + report_fatal_error("ghash table is full"); + } + } + llvm_unreachable("left infloop"); +} + +TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc) + : typeTable(alloc), idTable(alloc) {} + +TypeMerger::~TypeMerger() = default; + +void TypeMerger::mergeTypesWithGHash() { + // Load ghashes. Do type servers and PCH objects first. + { + ScopedTimer t1(loadGHashTimer); + parallelForEach(TpiSource::dependencySources, + [&](TpiSource *source) { source->loadGHashes(); }); + parallelForEach(TpiSource::objectSources, + [&](TpiSource *source) { source->loadGHashes(); }); + } + + ScopedTimer t2(mergeGHashTimer); + GHashState ghashState; + + // Estimate the size of hash table needed to deduplicate ghashes. This *must* + // be larger than the number of unique types, or hash table insertion may not + // be able to find a vacant slot. Summing the input types guarantees this, but + // it is a gross overestimate. The table size could be reduced to save memory, + // but it would require implementing rehashing, and this table is generally + // small compared to total memory usage, at eight bytes per input type record, + // and most input type records are larger than eight bytes. + size_t tableSize = 0; + for (TpiSource *source : TpiSource::instances) + tableSize += source->ghashes.size(); + + // Cap the table size so that we can use 32-bit cell indices. Type indices are + // also 32-bit, so this is an inherent PDB file format limit anyway. + tableSize = std::min(size_t(INT32_MAX), tableSize); + ghashState.table.init(static_cast(tableSize)); + + // Insert ghashes in parallel. During concurrent insertion, we cannot observe + // the contents of the hash table cell, but we can remember the insertion + // position. Because the table does not rehash, the position will not change + // under insertion. After insertion is done, the value of the cell can be read + // to retreive the final PDB type index. + parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) { + TpiSource *source = TpiSource::instances[tpiSrcIdx]; + source->indexMapStorage.resize(source->ghashes.size()); + for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) { + if (source->shouldOmitFromPdb(i)) { + source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated); + continue; + } + GloballyHashedType ghash = source->ghashes[i]; + bool isItem = source->isItemIndex.test(i); + uint32_t cellIdx = + ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i)); + + // Store the ghash cell index as a type index in indexMapStorage. Later + // we will replace it with the PDB type index. + source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx); + } + }); + + // Collect all non-empty cells and sort them. This will implicitly assign + // destination type indices, and partition the entries into type records and + // item records. It arranges types in this order: + // - type records + // - source 0, type 0... + // - source 1, type 1... + // - item records + // - source 0, type 1... + // - source 1, type 0... + std::vector entries; + for (const GHashCell &cell : + makeArrayRef(ghashState.table.table, tableSize)) { + if (!cell.isEmpty()) + entries.push_back(cell); + } + parallelSort(entries, std::less()); + log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n", + double(entries.size()) / tableSize, entries.size(), tableSize)); + + // Find out how many type and item indices there are. + auto mid = + std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0)); + assert((mid == entries.end() || mid->isItem()) && + (mid == entries.begin() || !std::prev(mid)->isItem()) && + "midpoint is not midpoint"); + uint32_t numTypes = std::distance(entries.begin(), mid); + uint32_t numItems = std::distance(mid, entries.end()); + log("Tpi record count: " + Twine(numTypes)); + log("Ipi record count: " + Twine(numItems)); + + // Make a list of the "unique" type records to merge for each tpi source. Type + // merging will skip indices not on this list. Store the destination PDB type + // index for these unique types in the tpiMap for each source. The entries for + // non-unique types will be filled in prior to type merging. + for (uint32_t i = 0, e = entries.size(); i < e; ++i) { + auto &cell = entries[i]; + uint32_t tpiSrcIdx = cell.getTpiSrcIdx(); + TpiSource *source = TpiSource::instances[tpiSrcIdx]; + source->uniqueTypes.push_back(cell.getGHashIdx()); + + // Update the ghash table to store the destination PDB type index in the + // table. + uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes; + uint32_t ghashCellIndex = + source->indexMapStorage[cell.getGHashIdx()].toArrayIndex(); + ghashState.table.table[ghashCellIndex] = + GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex); + } + + // In parallel, remap all types. + for_each(TpiSource::dependencySources, [&](TpiSource *source) { + source->remapTpiWithGHashes(&ghashState); + }); + parallelForEach(TpiSource::objectSources, [&](TpiSource *source) { + source->remapTpiWithGHashes(&ghashState); + }); + + TpiSource::clearGHashes(); +} + +/// Given the index into the ghash table for a particular type, return the type +/// index for that type in the output PDB. +static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, + uint32_t ghashCellIdx) { + GHashCell cell = g->table.table[ghashCellIdx]; + return TypeIndex::fromArrayIndex(cell.getGHashIdx()); +} + +// Fill in a TPI or IPI index map using ghashes. For each source type, use its +// ghash to lookup its final type index in the PDB, and store that in the map. +void TpiSource::fillMapFromGHashes(GHashState *g, + SmallVectorImpl &mapToFill) { + for (size_t i = 0, e = ghashes.size(); i < e; ++i) { + TypeIndex fakeCellIndex = indexMapStorage[i]; + if (fakeCellIndex.isSimple()) + mapToFill[i] = fakeCellIndex; + else + mapToFill[i] = loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex()); + } +} + +void TpiSource::clearGHashes() { + for (TpiSource *src : TpiSource::instances) { + if (src->ownedGHashes) + delete[] src->ghashes.data(); + src->ghashes = {}; + src->isItemIndex.clear(); + src->uniqueTypes.clear(); + } +} diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h index f97c0f7617445..17368244e5898 100644 --- a/lld/COFF/DebugTypes.h +++ b/lld/COFF/DebugTypes.h @@ -10,32 +10,37 @@ #define LLD_COFF_DEBUGTYPES_H #include "lld/Common/LLVM.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { namespace codeview { -class PrecompRecord; -class TypeServer2Record; +struct GloballyHashedType; } // namespace codeview namespace pdb { class NativeSession; +class TpiStream; } } // namespace llvm namespace lld { namespace coff { +using llvm::codeview::GloballyHashedType; using llvm::codeview::TypeIndex; class ObjFile; class PDBInputFile; class TypeMerger; +struct GHashState; class TpiSource { public: - enum TpiKind { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; + enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB }; TpiSource(TpiKind k, ObjFile *f); virtual ~TpiSource(); @@ -53,21 +58,97 @@ class TpiSource { /// caller-provided ObjectIndexMap. virtual Error mergeDebugT(TypeMerger *m); + /// Load global hashes, either by hashing types directly, or by loading them + /// from LLVM's .debug$H section. + virtual void loadGHashes(); + + /// Use global hashes to merge type information. + virtual void remapTpiWithGHashes(GHashState *g); + + // Remap a type index in place. + bool remapTypeIndex(TypeIndex &ti, llvm::codeview::TiRefKind refKind) const; + +protected: + void remapRecord(MutableArrayRef rec, + ArrayRef typeRefs); + + void mergeTypeRecord(llvm::codeview::CVType ty); + + // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex + // of the first record in this source, typically 0x1000. When PCHs are + // involved, it may start higher. + void mergeUniqueTypeRecords( + ArrayRef debugTypes, + TypeIndex beginIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex)); + + // Use the ghash table to construct a map from source type index to + // destination PDB type index. Usable for either TPI or IPI. + void fillMapFromGHashes(GHashState *m, + llvm::SmallVectorImpl &indexMap); + + // Copies ghashes from a vector into an array. These are long lived, so it's + // worth the time to copy these into an appropriately sized vector to reduce + // memory usage. + void assignGHashesFromVector(std::vector &&hashVec); + + // Walk over file->debugTypes and fill in the isItemIndex bit vector. + void fillIsItemIndexFromDebugT(); + +public: + bool remapTypesInSymbolRecord(MutableArrayRef rec); + + void remapTypesInTypeRecord(MutableArrayRef rec); + /// Is this a dependent file that needs to be processed first, before other /// OBJs? virtual bool isDependency() const { return false; } - static void forEachSource(llvm::function_ref fn); + /// Returns true if this type record should be omitted from the PDB, even if + /// it is unique. This prevents a record from being added to the input ghash + /// table. + bool shouldOmitFromPdb(uint32_t ghashIdx) { + return ghashIdx == endPrecompGHashIdx; + } + + /// All sources of type information in the program. + static std::vector instances; + + /// Dependency type sources, such as type servers or PCH object files. These + /// must be processed before objects that rely on them. Set by + /// TpiSources::sortDependencies. + static ArrayRef dependencySources; + + /// Object file sources. These must be processed after dependencySources. + static ArrayRef objectSources; + + /// Sorts the dependencies and reassigns TpiSource indices. + static void sortDependencies(); static uint32_t countTypeServerPDBs(); static uint32_t countPrecompObjs(); + /// Free heap allocated ghashes. + static void clearGHashes(); + /// Clear global data structures for TpiSources. static void clear(); const TpiKind kind; + bool ownedGHashes = true; + uint32_t tpiSrcIdx = 0; + +protected: + /// The ghash index (zero based, not 0x1000-based) of the LF_ENDPRECOMP record + /// in this object, if one exists. This is the all ones value otherwise. It is + /// recorded here so that it can be omitted from the final ghash table. + uint32_t endPrecompGHashIdx = ~0U; + +public: ObjFile *file; + /// An error encountered during type merging, if any. + Error typeMergingError = Error::success(); + // Storage for tpiMap or ipiMap, depending on the kind of source. llvm::SmallVector indexMapStorage; @@ -76,6 +157,31 @@ class TpiSource { // objects. llvm::ArrayRef tpiMap; llvm::ArrayRef ipiMap; + + /// Array of global type hashes, indexed by TypeIndex. May be calculated on + /// demand, or present in input object files. + llvm::ArrayRef ghashes; + + /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function + /// type index here. Both indices are PDB indices, not object type indexes. + llvm::DenseMap funcIdToType; + + /// Indicates if a type record is an item index or a type index. + llvm::BitVector isItemIndex; + + /// A list of all "unique" type indices which must be merged into the final + /// PDB. GHash type deduplication produces this list, and it should be + /// considerably smaller than the input. + std::vector uniqueTypes; + + struct MergedInfo { + std::vector recs; + std::vector recSizes; + std::vector recHashes; + }; + + MergedInfo mergedTpi; + MergedInfo mergedIpi; }; TpiSource *makeTpiSource(ObjFile *file); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index fb496a1c106f2..56717de226c29 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -69,13 +69,13 @@ bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS, lld::stderrOS = &stderrOS; errorHandler().cleanupCallback = []() { + TpiSource::clear(); freeArena(); ObjFile::instances.clear(); PDBInputFile::instances.clear(); ImportFile::instances.clear(); BitcodeFile::instances.clear(); memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances)); - TpiSource::clear(); OutputSection::clear(); }; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index bfa7bd8148dfd..21a1341f78443 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -66,7 +66,8 @@ using llvm::object::coff_section; static ExitOnError exitOnErr; static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); - +Timer lld::coff::loadGHashTimer("Global Type Hashing", totalPdbLinkTimer); +Timer lld::coff::mergeGHashTimer("GHash Type Merging", totalPdbLinkTimer); static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer); static Timer typeMergingTimer("Type Merging", addObjectsTimer); static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer); @@ -112,8 +113,6 @@ class PDBLinker { /// externally. void addDebug(TpiSource *source); - bool mergeTypeRecords(TpiSource *source); - void addDebugSymbols(TpiSource *source); void mergeSymbolRecords(TpiSource *source, @@ -250,43 +249,18 @@ static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder, }); } -static bool remapTypeIndex(TypeIndex &ti, ArrayRef typeIndexMap) { - if (ti.isSimple()) - return true; - if (ti.toArrayIndex() >= typeIndexMap.size()) - return false; - ti = typeIndexMap[ti.toArrayIndex()]; - return true; -} - -static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind, - MutableArrayRef recordBytes, - TpiSource *source, - ArrayRef typeRefs) { - MutableArrayRef contents = - recordBytes.drop_front(sizeof(RecordPrefix)); - for (const TiReference &ref : typeRefs) { - unsigned byteSize = ref.Count * sizeof(TypeIndex); - if (contents.size() < ref.Offset + byteSize) - fatal("symbol record too short"); - - // This can be an item index or a type index. Choose the appropriate map. - bool isItemIndex = ref.Kind == TiRefKind::IndexRef; - ArrayRef typeOrItemMap = - isItemIndex ? source->ipiMap : source->tpiMap; - - MutableArrayRef tIs( - reinterpret_cast(contents.data() + ref.Offset), ref.Count); - for (TypeIndex &ti : tIs) { - if (!remapTypeIndex(ti, typeOrItemMap)) { - log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " + - file->getName() + " with bad " + (isItemIndex ? "item" : "type") + - " index 0x" + utohexstr(ti.getIndex())); - ti = TypeIndex(SimpleTypeKind::NotTranslated); - continue; - } - } - } +static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) { + // Start the TPI or IPI stream header. + builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80); + builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80); + for_each(TpiSource::instances, [&](TpiSource *source) { + builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs, + source->mergedTpi.recSizes, + source->mergedTpi.recHashes); + builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs, + source->mergedIpi.recSizes, + source->mergedIpi.recHashes); + }); } static void @@ -329,7 +303,7 @@ static SymbolKind symbolKind(ArrayRef recordData) { /// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32 static void translateIdSymbols(MutableArrayRef &recordData, - TypeCollection &idTable) { + TypeMerger &tMerger, TpiSource *source) { RecordPrefix *prefix = reinterpret_cast(recordData.data()); SymbolKind kind = symbolKind(recordData); @@ -356,13 +330,25 @@ static void translateIdSymbols(MutableArrayRef &recordData, reinterpret_cast(content.data() + refs[0].Offset); // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in // the IPI stream, whose `FunctionType` member refers to the TPI stream. - // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and + // Note that LF_FUNC_ID and LF_MFUNC_ID have the same record layout, and // in both cases we just need the second type index. if (!ti->isSimple() && !ti->isNoneType()) { - CVType funcIdData = idTable.getType(*ti); - ArrayRef tiBuf = funcIdData.data().slice(8, 4); - assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record"); - *ti = *reinterpret_cast(tiBuf.data()); + if (config->debugGHashes) { + auto idToType = source->funcIdToType.find(*ti); + if (idToType == source->funcIdToType.end()) { + warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item " + "index {1:X} which is not a LF_[M]FUNC_ID record", + source->file->getName(), ti->getIndex())); + *ti = TypeIndex(SimpleTypeKind::NotTranslated); + } else { + *ti = idToType->second; + } + } else { + CVType funcIdData = tMerger.getIDTable().getType(*ti); + ArrayRef tiBuf = funcIdData.data().slice(8, 4); + assert(tiBuf.size() == 4 && "corrupt LF_[M]FUNC_ID record"); + *ti = *reinterpret_cast(tiBuf.data()); + } } kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32 @@ -561,22 +547,16 @@ void PDBLinker::mergeSymbolRecords(TpiSource *source, const_cast(sym.data().data()), sym.length()); } - // Discover type index references in the record. Skip it if we don't - // know where they are. - SmallVector typeRefs; - if (!discoverTypeIndicesInSymbol(sym, typeRefs)) { - log("ignoring unknown symbol record with kind 0x" + - utohexstr(sym.kind())); + // Re-map all the type index references. + if (!source->remapTypesInSymbolRecord(recordBytes)) { + log("error remapping types in symbol of kind 0x" + + utohexstr(sym.kind()) + ", ignoring"); return Error::success(); } - // Re-map all the type index references. - remapTypesInSymbolRecord(file, sym.kind(), recordBytes, source, - typeRefs); - // An object file may have S_xxx_ID symbols, but these get converted to // "real" symbols in a PDB. - translateIdSymbols(recordBytes, tMerger.getIDTable()); + translateIdSymbols(recordBytes, tMerger, source); sym = CVSymbol(recordBytes); // If this record refers to an offset in the object file's string table, @@ -748,11 +728,15 @@ void DebugSHandler::mergeInlineeLines( const DebugSubsectionRecord &inlineeSubsection) { DebugInlineeLinesSubsectionRef inlineeLines; exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData())); + if (!source) { + warn("ignoring inlinee lines section in file that lacks type information"); + return; + } // Remap type indices in inlinee line records in place. for (const InlineeSourceLine &line : inlineeLines) { TypeIndex &inlinee = *const_cast(&line.Header->Inlinee); - if (!remapTypeIndex(inlinee, source->ipiMap)) { + if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) { log("bad inlinee line record in " + file.getName() + " with bad inlinee index 0x" + utohexstr(inlinee.getIndex())); } @@ -827,20 +811,6 @@ static void warnUnusable(InputFile *f, Error e) { warn(msg); } -bool PDBLinker::mergeTypeRecords(TpiSource *source) { - ScopedTimer t(typeMergingTimer); - // Before we can process symbol substreams from .debug$S, we need to process - // type information, file checksums, and the string table. Add type info to - // the PDB first, so that we can get the map from object file type and item - // indices to PDB type and item indices. - if (Error e = source->mergeDebugT(&tMerger)) { - // If the .debug$T sections fail to merge, assume there is no debug info. - warnUnusable(source->file, std::move(e)); - return false; - } - return true; -} - // Allocate memory for a .debug$S / .debug$F section and relocate it. static ArrayRef relocateDebugChunk(SectionChunk &debugChunk) { uint8_t *buffer = bAlloc.Allocate(debugChunk.getSize()); @@ -920,9 +890,28 @@ static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) { } void PDBLinker::addDebug(TpiSource *source) { + // Before we can process symbol substreams from .debug$S, we need to process + // type information, file checksums, and the string table. Add type info to + // the PDB first, so that we can get the map from object file type and item + // indices to PDB type and item indices. If we are using ghashes, types have + // already been merged. + if (!config->debugGHashes) { + ScopedTimer t(typeMergingTimer); + if (Error e = source->mergeDebugT(&tMerger)) { + // If type merging failed, ignore the symbols. + warnUnusable(source->file, std::move(e)); + return; + } + } + // If type merging failed, ignore the symbols. - if (mergeTypeRecords(source)) - addDebugSymbols(source); + Error typeError = std::move(source->typeMergingError); + if (typeError) { + warnUnusable(source->file, std::move(typeError)); + return; + } + + addDebugSymbols(source); } static pdb::BulkPublic createPublic(Defined *def) { @@ -955,25 +944,31 @@ void PDBLinker::addObjectsToPDB() { for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(builder, obj); }); - // Merge dependencies - TpiSource::forEachSource([&](TpiSource *source) { - if (source->isDependency()) - addDebug(source); - }); + // Reorder dependency type sources to come first. + TpiSource::sortDependencies(); - // Merge regular and dependent OBJs - TpiSource::forEachSource([&](TpiSource *source) { - if (!source->isDependency()) - addDebug(source); - }); + // Merge type information from input files using global type hashing. + if (config->debugGHashes) + tMerger.mergeTypesWithGHash(); + + // Merge dependencies and then regular objects. + for_each(TpiSource::dependencySources, + [&](TpiSource *source) { addDebug(source); }); + for_each(TpiSource::objectSources, + [&](TpiSource *source) { addDebug(source); }); builder.getStringTableBuilder().setStrings(pdbStrTab); t1.stop(); // Construct TPI and IPI stream contents. ScopedTimer t2(tpiStreamLayoutTimer); - addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); - addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + // Collect all the merged types. + if (config->debugGHashes) { + addGHashTypeInfo(builder); + } else { + addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); + addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + } t2.stop(); } @@ -1014,8 +1009,8 @@ void PDBLinker::printStats() { "Input OBJ files (expanded from all cmd-line inputs)"); print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies"); print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies"); - print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(), - "Merged TPI records"); + print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records"); + print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records"); print(pdbStrTab.size(), "Output PDB strings"); print(globalSymbols, "Global symbol records"); print(moduleSymbols, "Module symbol records"); @@ -1067,8 +1062,11 @@ void PDBLinker::printStats() { } }; - printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); - printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); + if (!config->debugGHashes) { + // FIXME: Reimplement for ghash. + printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable()); + printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable()); + } message(buffer); } diff --git a/lld/COFF/PDB.h b/lld/COFF/PDB.h index 273609ea788c5..53506d40baef4 100644 --- a/lld/COFF/PDB.h +++ b/lld/COFF/PDB.h @@ -20,6 +20,8 @@ union DebugInfo; } namespace lld { +class Timer; + namespace coff { class OutputSection; class SectionChunk; @@ -32,6 +34,10 @@ void createPDB(SymbolTable *symtab, llvm::Optional> getFileLineCodeView(const SectionChunk *c, uint32_t addr); + +extern Timer loadGHashTimer; +extern Timer mergeGHashTimer; + } // namespace coff } // namespace lld diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h index d3184a7f18d74..be877cfda6e6b 100644 --- a/lld/COFF/TypeMerger.h +++ b/lld/COFF/TypeMerger.h @@ -10,45 +10,47 @@ #define LLD_COFF_TYPEMERGER_H #include "Config.h" -#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/Support/Allocator.h" +#include namespace lld { namespace coff { +using llvm::codeview::GloballyHashedType; +using llvm::codeview::TypeIndex; + +struct GHashState; + class TypeMerger { public: - TypeMerger(llvm::BumpPtrAllocator &alloc) - : typeTable(alloc), idTable(alloc), globalTypeTable(alloc), - globalIDTable(alloc) {} + TypeMerger(llvm::BumpPtrAllocator &alloc); + + ~TypeMerger(); /// Get the type table or the global type table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getTypeTable() { - if (config->debugGHashes) - return globalTypeTable; + assert(!config->debugGHashes); return typeTable; } /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled. inline llvm::codeview::TypeCollection &getIDTable() { - if (config->debugGHashes) - return globalIDTable; + assert(!config->debugGHashes); return idTable; } + /// Use global hashes to eliminate duplicate types and identify unique type + /// indices in each TpiSource. + void mergeTypesWithGHash(); + /// Type records that will go into the PDB TPI stream. llvm::codeview::MergingTypeTableBuilder typeTable; /// Item records that will go into the PDB IPI stream. llvm::codeview::MergingTypeTableBuilder idTable; - /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH) - llvm::codeview::GlobalTypeTableBuilder globalTypeTable; - - /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH) - llvm::codeview::GlobalTypeTableBuilder globalIDTable; - // When showSummary is enabled, these are histograms of TPI and IPI records // keyed by type index. SmallVector tpiCounts; diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index 4ffc564e67e2f..79a5940823bdf 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -153,6 +153,13 @@ template T check(Expected e) { return std::move(*e); } +// Don't move from Expected wrappers around references. +template T &check(Expected e) { + if (!e) + fatal(llvm::toString(e.takeError())); + return *e; +} + template T check2(ErrorOr e, llvm::function_ref prefix) { if (auto ec = e.getError()) diff --git a/lld/test/COFF/pdb-global-hashes.test b/lld/test/COFF/pdb-global-hashes.test index 13039d42fe26a..430275b7a8848 100644 --- a/lld/test/COFF/pdb-global-hashes.test +++ b/lld/test/COFF/pdb-global-hashes.test @@ -2,7 +2,7 @@ RUN: yaml2obj %p/Inputs/pdb-hashes-1.yaml -o %t.1.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2.yaml -o %t.2.obj RUN: yaml2obj %p/Inputs/pdb-hashes-2-missing.yaml -o %t.2.missing.obj RUN: lld-link /debug %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.nohash.pdb -RUN: lld-link /debug:ghash %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb +RUN: lld-link /debug:ghash -verbose %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb RUN: lld-link /debug:ghash %t.1.obj %t.2.missing.obj /entry:main /nodefaultlib /PDB:%t.mixed.pdb RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.nohash.pdb | FileCheck %s RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.hash.pdb | FileCheck %s diff --git a/lld/test/COFF/pdb-procid-remapping.test b/lld/test/COFF/pdb-procid-remapping.test index d7ea775be98e7..adc93585f2aac 100644 --- a/lld/test/COFF/pdb-procid-remapping.test +++ b/lld/test/COFF/pdb-procid-remapping.test @@ -1,8 +1,12 @@ -# RUN: yaml2obj %p/Inputs/pdb1.yaml -o %t1.obj -# RUN: yaml2obj %p/Inputs/pdb2.yaml -o %t2.obj +# RUN: yaml2obj < %p/Inputs/pdb1.yaml > %t1.obj +# RUN: yaml2obj < %p/Inputs/pdb2.yaml > %t2.obj + # RUN: lld-link /debug /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ # RUN: %t1.obj %t2.obj +# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s +# RUN: lld-link /debug /debug:ghash /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \ +# RUN: %t1.obj %t2.obj # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s CHECK: Symbols diff --git a/lld/test/COFF/pdb-type-server-missing.yaml b/lld/test/COFF/pdb-type-server-missing.yaml index 1a8c9a05c3d9c..78ddc0e4adb28 100644 --- a/lld/test/COFF/pdb-type-server-missing.yaml +++ b/lld/test/COFF/pdb-type-server-missing.yaml @@ -5,6 +5,7 @@ # RUN: yaml2obj %s -o %t1.obj # RUN: yaml2obj %p/Inputs/pdb-type-server-missing-2.yaml -o %t2.obj # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN +# RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug:ghash -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 2>&1 | FileCheck %s -check-prefix=IGNORE -allow-empty # RUN: not lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /WX 2>&1 | FileCheck %s -check-prefix=ERR # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 /WX 2>&1 | FileCheck %s -check-prefix=IGNORE-ERR -allow-empty diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test index bcba6da28b690..b954712d9b6c3 100644 --- a/lld/test/COFF/pdb-type-server-simple.test +++ b/lld/test/COFF/pdb-type-server-simple.test @@ -20,7 +20,11 @@ RUN: rm -rf %t && mkdir -p %t && cd %t RUN: yaml2obj %S/Inputs/pdb-type-server-simple-a.yaml -o a.obj RUN: yaml2obj %S/Inputs/pdb-type-server-simple-b.yaml -o b.obj RUN: llvm-pdbutil yaml2pdb %S/Inputs/pdb-type-server-simple-ts.yaml -pdb ts.pdb -RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib /summary | FileCheck %s -check-prefix SUMMARY +RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib -summary | FileCheck %s -check-prefix SUMMARY +RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s + +Re-run with /DEBUG:GHASH +RUN: lld-link a.obj b.obj -entry:main -debug:ghash -out:t.exe -pdb:t.pdb -nodefaultlib -summary -verbose RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s @@ -101,7 +105,8 @@ SUMMARY-NEXT: ------------------------------------------------------------------ SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 1 PDB type server dependencies SUMMARY-NEXT: 0 Precomp OBJ dependencies -SUMMARY-NEXT: 25 Merged TPI records +SUMMARY-NEXT: 9 Merged TPI records +SUMMARY-NEXT: 16 Merged IPI records SUMMARY-NEXT: 3 Output PDB strings SUMMARY-NEXT: 4 Global symbol records SUMMARY-NEXT: 14 Module symbol records diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test index b0692ee8002f7..161ee88d27f5e 100644 --- a/lld/test/COFF/precomp-link.test +++ b/lld/test/COFF/precomp-link.test @@ -5,6 +5,7 @@ RUN: lld-link %S/Inputs/precomp.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b. RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE +RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE FIXME: The following RUN line should fail, regardless of whether debug info is enabled or not. Normally this would result in an error due to missing _PchSym_ @@ -52,12 +53,19 @@ CHECK-NOT: LF_PRECOMP CHECK-NOT: LF_ENDPRECOMP +Re-run with ghash. Eventually, perhaps this will be the default. + +RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf /summary | FileCheck %s -check-prefix SUMMARY +RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s + + SUMMARY: Summary SUMMARY-NEXT: -------------------------------------------------------------------------------- SUMMARY-NEXT: 3 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 0 PDB type server dependencies SUMMARY-NEXT: 1 Precomp OBJ dependencies -SUMMARY-NEXT: 1044 Merged TPI records +SUMMARY-NEXT: 874 Merged TPI records +SUMMARY-NEXT: 170 Merged IPI records SUMMARY-NEXT: 5 Output PDB strings SUMMARY-NEXT: 167 Global symbol records SUMMARY-NEXT: 20 Module symbol records diff --git a/lld/test/COFF/s_udt.s b/lld/test/COFF/s_udt.s index 63e4099709575..373394334b19c 100644 --- a/lld/test/COFF/s_udt.s +++ b/lld/test/COFF/s_udt.s @@ -2,6 +2,8 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-windows-msvc < %s > %t.obj # RUN: lld-link /DEBUG:FULL /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe # RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s +# RUN: lld-link /DEBUG:FULL /debug:ghash /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe +# RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s # CHECK: Types (TPI Stream) # CHECK-NEXT: ============================================================ diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h index b0a16cccbff31..e6ade770457c2 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -86,6 +86,16 @@ struct GloballyHashedType { bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + friend inline bool operator==(const GloballyHashedType &L, + const GloballyHashedType &R) { + return L.Hash == R.Hash; + } + + friend inline bool operator!=(const GloballyHashedType &L, + const GloballyHashedType &R) { + return !(L.Hash == R.Hash); + } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -206,7 +216,7 @@ template <> struct DenseMapInfo { static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS) { - return LHS.Hash == RHS.Hash; + return LHS == RHS; } }; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index b9e2562bfc2b1..bdc6cf46509bc 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -116,13 +116,22 @@ class TypeIndex { uint32_t toArrayIndex() const { assert(!isSimple()); - return getIndex() - FirstNonSimpleIndex; + return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex; } static TypeIndex fromArrayIndex(uint32_t Index) { return TypeIndex(Index + FirstNonSimpleIndex); } + static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) { + return TypeIndex((Index + FirstNonSimpleIndex) | + (IsItem ? DecoratedItemIdMask : 0)); + } + + TypeIndex removeDecoration() { + return TypeIndex(Index & ~DecoratedItemIdMask); + } + SimpleTypeKind getSimpleKind() const { assert(isSimple()); return static_cast(Index & SimpleKindMask); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index 72d98e9c2c4d1..9ef2ee6a93070 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -54,16 +54,20 @@ class TpiStreamBuilder { void setVersionHeader(PdbRaw_TpiVer Version); void addTypeRecord(ArrayRef Type, Optional Hash); + void addTypeRecords(ArrayRef Types, ArrayRef Sizes, + ArrayRef Hashes); Error finalizeMsfLayout(); - uint32_t getRecordCount() const { return TypeRecords.size(); } + uint32_t getRecordCount() const { return TypeRecordCount; } Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); private: + void updateTypeIndexOffsets(ArrayRef Sizes); + uint32_t calculateHashBufferSize() const; uint32_t calculateIndexOffsetSize() const; Error finalize(); @@ -71,10 +75,11 @@ class TpiStreamBuilder { msf::MSFBuilder &Msf; BumpPtrAllocator &Allocator; + uint32_t TypeRecordCount = 0; size_t TypeRecordBytes = 0; PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; - std::vector> TypeRecords; + std::vector> TypeRecBuffers; std::vector TypeHashes; std::vector TypeIndexOffsets; uint32_t HashStreamIndex = kInvalidStreamIndex; diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp index 47b5498181b7f..1ca899789bef2 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h" @@ -77,9 +78,10 @@ Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { uint32_t Size = Indices.size(); Name = "("; for (uint32_t I = 0; I < Size; ++I) { - assert(Indices[I] < CurrentTypeIndex); - - Name.append(Types.getTypeName(Indices[I])); + if (Indices[I] < CurrentTypeIndex) + Name.append(Types.getTypeName(Indices[I])); + else + Name.append(""); if (I + 1 != Size) Name.append(", "); } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 51a1f0a544e3c..b5e7b03e6917f 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Error.h" #include #include +#include using namespace llvm; using namespace llvm::msf; @@ -41,39 +42,68 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { VerHeader = Version; } +void TpiStreamBuilder::updateTypeIndexOffsets(ArrayRef Sizes) { + // If we just crossed an 8KB threshold, add a type index offset. + for (uint16_t Size : Sizes) { + size_t NewSize = TypeRecordBytes + Size; + constexpr size_t EightKB = 8 * 1024; + if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecordCount == 0) { + TypeIndexOffsets.push_back( + {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + + TypeRecordCount), + ulittle32_t(TypeRecordBytes)}); + } + ++TypeRecordCount; + TypeRecordBytes = NewSize; + } +} + void TpiStreamBuilder::addTypeRecord(ArrayRef Record, Optional Hash) { - // If we just crossed an 8KB threshold, add a type index offset. assert(((Record.size() & 3) == 0) && "The type record's size is not a multiple of 4 bytes which will " "cause misalignment in the output TPI stream!"); - size_t NewSize = TypeRecordBytes + Record.size(); - constexpr size_t EightKB = 8 * 1024; - if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { - TypeIndexOffsets.push_back( - {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex + - TypeRecords.size()), - ulittle32_t(TypeRecordBytes)}); - } - TypeRecordBytes = NewSize; + assert(Record.size() <= codeview::MaxRecordLength); + uint16_t OneSize = (uint16_t)Record.size(); + updateTypeIndexOffsets(makeArrayRef(&OneSize, 1)); - TypeRecords.push_back(Record); + TypeRecBuffers.push_back(Record); + // FIXME: Require it. if (Hash) TypeHashes.push_back(*Hash); } +void TpiStreamBuilder::addTypeRecords(ArrayRef Types, + ArrayRef Sizes, + ArrayRef Hashes) { + // Ignore empty type buffers. There should be no hashes or sizes in this case. + if (Types.empty()) { + assert(Sizes.empty() && Hashes.empty()); + return; + } + + assert(((Types.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + assert(Sizes.size() == Hashes.size() && "sizes and hashes should be in sync"); + assert(std::accumulate(Sizes.begin(), Sizes.end(), 0U) == Types.size() && + "sizes of type records should sum to the size of the types"); + updateTypeIndexOffsets(Sizes); + + TypeRecBuffers.push_back(Types); + TypeHashes.insert(TypeHashes.end(), Hashes.begin(), Hashes.end()); +} + Error TpiStreamBuilder::finalize() { if (Header) return Error::success(); TpiStreamHeader *H = Allocator.Allocate(); - uint32_t Count = TypeRecords.size(); - H->Version = VerHeader; H->HeaderSize = sizeof(TpiStreamHeader); H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; - H->TypeIndexEnd = H->TypeIndexBegin + Count; + H->TypeIndexEnd = H->TypeIndexBegin + TypeRecordCount; H->TypeRecordBytes = TypeRecordBytes; H->HashStreamIndex = HashStreamIndex; @@ -104,7 +134,7 @@ uint32_t TpiStreamBuilder::calculateSerializedLength() { } uint32_t TpiStreamBuilder::calculateHashBufferSize() const { - assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) && + assert((TypeRecordCount == TypeHashes.size() || TypeHashes.empty()) && "either all or no type records should have hashes"); return TypeHashes.size() * sizeof(ulittle32_t); } @@ -155,7 +185,7 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecords) { + for (auto Rec : TypeRecBuffers) { assert(!Rec.empty() && "Attempting to write an empty type record shifts " "all offsets in the TPI stream!"); assert(((Rec.size() & 3) == 0) && From 37b2e2b04cf434b368b1edf29609be21952316f9 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 13:34:23 -0700 Subject: [PATCH 214/544] [flang] Semantic analysis for FINAL subroutines Represent FINAL subroutines in the symbol table entries of derived types. Enforce constraints. Update tests that have inadvertent violations or modified messages. Added a test. The specific procedure distinguishability checking code for generics was used to enforce distinguishability of FINAL procedures. (Also cleaned up some confusion and redundancy noticed in the type compatibility infrastructure while digging into that area.) Differential revision: https://reviews.llvm.org/D88613 --- .../include/flang/Evaluate/characteristics.h | 2 +- flang/include/flang/Evaluate/type.h | 6 +- flang/include/flang/Semantics/symbol.h | 7 +- flang/include/flang/Semantics/tools.h | 15 ++ flang/lib/Evaluate/characteristics.cpp | 2 +- flang/lib/Evaluate/tools.cpp | 1 - flang/lib/Evaluate/type.cpp | 90 ++-------- flang/lib/Semantics/check-call.cpp | 27 +-- flang/lib/Semantics/check-declarations.cpp | 167 ++++++++++++++++-- flang/lib/Semantics/mod-file.cpp | 20 ++- flang/lib/Semantics/mod-file.h | 3 +- flang/lib/Semantics/pointer-assignment.cpp | 2 +- flang/lib/Semantics/resolve-names.cpp | 18 +- flang/lib/Semantics/symbol.cpp | 2 - flang/lib/Semantics/tools.cpp | 64 +++++-- flang/test/Semantics/call03.f90 | 4 +- flang/test/Semantics/call05.f90 | 4 +- flang/test/Semantics/final01.f90 | 119 +++++++++++++ flang/test/Semantics/modfile10.f90 | 2 +- flang/test/Semantics/resolve32.f90 | 2 +- flang/test/Semantics/resolve55.f90 | 19 +- 21 files changed, 427 insertions(+), 149 deletions(-) create mode 100644 flang/test/Semantics/final01.f90 diff --git a/flang/include/flang/Evaluate/characteristics.h b/flang/include/flang/Evaluate/characteristics.h index fe7cc2dac0ca5..bde734cd510dc 100644 --- a/flang/include/flang/Evaluate/characteristics.h +++ b/flang/include/flang/Evaluate/characteristics.h @@ -45,7 +45,7 @@ namespace Fortran::evaluate::characteristics { using common::CopyableIndirection; -// Are these procedures distinguishable for a generic name? +// Are these procedures distinguishable for a generic name or FINAL? bool Distinguishable(const Procedure &, const Procedure &); // Are these procedures distinguishable for a generic operator or assignment? bool DistinguishableOpOrAssign(const Procedure &, const Procedure &); diff --git a/flang/include/flang/Evaluate/type.h b/flang/include/flang/Evaluate/type.h index 663ece6eb4a09..183cb6de2781b 100644 --- a/flang/include/flang/Evaluate/type.h +++ b/flang/include/flang/Evaluate/type.h @@ -166,11 +166,9 @@ class DynamicType { bool HasDeferredTypeParameter() const; // 7.3.2.3 & 15.5.2.4 type compatibility. - // x.IsTypeCompatibleWith(y) is true if "x => y" or passing actual y to + // x.IsTkCompatibleWith(y) is true if "x => y" or passing actual y to // dummy argument x would be valid. Be advised, this is not a reflexive - // relation. - bool IsTypeCompatibleWith(const DynamicType &) const; - // Type compatible and kind type parameters match + // relation. Kind type parameters must match. bool IsTkCompatibleWith(const DynamicType &) const; // Result will be missing when a symbol is absent or diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 5f861d10332ed..ca6ab22c14ca2 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -248,6 +248,8 @@ class DerivedTypeDetails { const std::list ¶mNames() const { return paramNames_; } const SymbolVector ¶mDecls() const { return paramDecls_; } bool sequence() const { return sequence_; } + std::map &finals() { return finals_; } + const std::map &finals() const { return finals_; } bool isForwardReferenced() const { return isForwardReferenced_; } void add_paramName(const SourceName &name) { paramNames_.push_back(name); } void add_paramDecl(const Symbol &symbol) { paramDecls_.push_back(symbol); } @@ -279,6 +281,7 @@ class DerivedTypeDetails { // These are the names of the derived type's components in component // order. A parent component, if any, appears first in this list. std::list componentNames_; + std::map finals_; // FINAL :: subr bool sequence_{false}; bool isForwardReferenced_{false}; friend llvm::raw_ostream &operator<<( @@ -322,8 +325,6 @@ class CommonBlockDetails { std::size_t alignment_{0}; // required alignment in bytes }; -class FinalProcDetails {}; // TODO - class MiscDetails { public: ENUM_CLASS(Kind, None, ConstructName, ScopeName, PassName, ComplexPartRe, @@ -471,7 +472,7 @@ using Details = std::variant; + TypeParamDetails, MiscDetails>; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Details &); std::string DetailsToString(const Details &); diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 58ba7bf700175..6e1e06b3ec761 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -162,6 +162,7 @@ inline bool IsAssumedRankArray(const Symbol &symbol) { } bool IsAssumedLengthCharacter(const Symbol &); bool IsExternal(const Symbol &); +bool IsModuleProcedure(const Symbol &); // Is the symbol modifiable in this scope std::optional WhyNotModifiable( const Symbol &, const Scope &); @@ -283,6 +284,20 @@ template bool IsZero(const T &expr) { return value && *value == 0; } +// 15.2.2 +enum class ProcedureDefinitionClass { + None, + Intrinsic, + External, + Internal, + Module, + Dummy, + Pointer, + StatementFunction +}; + +ProcedureDefinitionClass ClassifyProcedure(const Symbol &); + // Derived type component iterator that provides a C++ LegacyForwardIterator // iterator over the Ordered, Direct, Ultimate or Potential components of a // DerivedTypeSpec. These iterators can be used with STL algorithms diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index de013367f6aa1..a28f4dd004cc1 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -130,7 +130,7 @@ bool TypeAndShape::IsCompatibleWith(parser::ContextualMessages &messages, const TypeAndShape &that, const char *thisIs, const char *thatIs, bool isElemental) const { const auto &len{that.LEN()}; - if (!type_.IsTypeCompatibleWith(that.type_)) { + if (!type_.IsTkCompatibleWith(that.type_)) { messages.Say( "%1$s type '%2$s' is not compatible with %3$s type '%4$s'"_err_en_US, thatIs, that.type_.AsFortran(len ? len->AsFortran() : ""), thisIs, diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 567a3768b103f..b560cce1192d4 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -965,7 +965,6 @@ bool IsProcedure(const Symbol &symbol) { [](const GenericDetails &) { return true; }, [](const ProcBindingDetails &) { return true; }, [](const UseDetails &x) { return IsProcedure(x.symbol()); }, - // TODO: FinalProcDetails? [](const auto &) { return false; }, }, symbol.details()); diff --git a/flang/lib/Evaluate/type.cpp b/flang/lib/Evaluate/type.cpp index e96e19150f4ee..e370f2b05b954 100644 --- a/flang/lib/Evaluate/type.cpp +++ b/flang/lib/Evaluate/type.cpp @@ -218,19 +218,6 @@ const semantics::DerivedTypeSpec *GetParentTypeSpec( } } -static const semantics::Symbol *FindComponent( - const semantics::DerivedTypeSpec &derived, parser::CharBlock name) { - if (const auto *scope{derived.scope()}) { - auto iter{scope->find(name)}; - if (iter != scope->end()) { - return &*iter->second; - } else if (const auto *parent{GetParentTypeSpec(derived)}) { - return FindComponent(*parent, name); - } - } - return nullptr; -} - // Compares two derived type representations to see whether they both // represent the "same type" in the sense of section 7.5.2.4. using SetOfDerivedTypePairs = @@ -294,24 +281,9 @@ static bool AreSameComponent(const semantics::Symbol &x, if (x.attrs().test(semantics::Attr::PRIVATE)) { return false; } -#if 0 // TODO - if (const auto *xObject{x.detailsIf()}) { - if (const auto *yObject{y.detailsIf()}) { -#else - if (x.has()) { - if (y.has()) { -#endif - // TODO: compare types, type parameters, bounds, &c. - return true; -} -else { - return false; -} -} // namespace Fortran::evaluate -else { - // TODO: non-object components - return true; -} + // TODO: compare types, parameters, bounds, &c. + return x.has() == + y.has(); } static bool AreCompatibleDerivedTypes(const semantics::DerivedTypeSpec *x, @@ -334,45 +306,9 @@ bool IsKindTypeParameter(const semantics::Symbol &symbol) { return param && param->attr() == common::TypeParamAttr::Kind; } -static bool IsKindTypeParameter( - const semantics::DerivedTypeSpec &derived, parser::CharBlock name) { - const semantics::Symbol *symbol{FindComponent(derived, name)}; - return symbol && IsKindTypeParameter(*symbol); -} - -bool DynamicType::IsTypeCompatibleWith(const DynamicType &that) const { - if (derived_) { - if (!AreCompatibleDerivedTypes(derived_, that.derived_, IsPolymorphic())) { - return false; - } - // The values of derived type KIND parameters must match. - for (const auto &[name, param] : derived_->parameters()) { - if (IsKindTypeParameter(*derived_, name)) { - bool ok{false}; - if (auto myValue{ToInt64(param.GetExplicit())}) { - if (const auto *thatParam{that.derived_->FindParameter(name)}) { - if (auto thatValue{ToInt64(thatParam->GetExplicit())}) { - ok = *myValue == *thatValue; - } - } - } - if (!ok) { - return false; - } - } - } - return true; - } else if (category_ == that.category_ && kind_ == that.kind_) { - // CHARACTER length is not checked here - return true; - } else { - return IsUnlimitedPolymorphic(); - } -} - // Do the kind type parameters of type1 have the same values as the -// corresponding kind type parameters of the type2? -static bool IsKindCompatible(const semantics::DerivedTypeSpec &type1, +// corresponding kind type parameters of type2? +static bool AreKindCompatible(const semantics::DerivedTypeSpec &type1, const semantics::DerivedTypeSpec &type2) { for (const auto &[name, param1] : type1.parameters()) { if (param1.isKind()) { @@ -385,18 +321,20 @@ static bool IsKindCompatible(const semantics::DerivedTypeSpec &type1, return true; } +// See 7.3.2.3 (5) & 15.5.2.4 bool DynamicType::IsTkCompatibleWith(const DynamicType &that) const { - if (category_ != TypeCategory::Derived) { - return category_ == that.category_ && kind_ == that.kind_; - } else if (IsUnlimitedPolymorphic()) { + if (IsUnlimitedPolymorphic()) { return true; } else if (that.IsUnlimitedPolymorphic()) { return false; - } else if (!derived_ || !that.derived_ || - !IsKindCompatible(*derived_, *that.derived_)) { - return false; // kind params don't match + } else if (category_ != that.category_) { + return false; + } else if (derived_) { + return that.derived_ && + AreCompatibleDerivedTypes(derived_, that.derived_, IsPolymorphic()) && + AreKindCompatible(*derived_, *that.derived_); } else { - return AreCompatibleDerivedTypes(derived_, that.derived_, IsPolymorphic()); + return kind_ == that.kind_; } } diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 8c3810cd9daa8..7e1d57cf579e5 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -144,8 +144,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, parser::ContextualMessages &messages{context.messages()}; PadShortCharacterActual(actual, dummy.type, actualType, messages); ConvertIntegerActual(actual, dummy.type, actualType, messages); - bool typesCompatible{ - dummy.type.type().IsTypeCompatibleWith(actualType.type())}; + bool typesCompatible{dummy.type.type().IsTkCompatibleWith(actualType.type())}; if (typesCompatible) { if (isElemental) { } else if (dummy.type.attrs().test( @@ -215,13 +214,17 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, "Actual argument associated with TYPE(*) %s may not have type-bound procedure '%s'"_err_en_US, dummyName, tbp->name()); } - if (const Symbol * - finalizer{FindImmediateComponent(*derived, [](const Symbol &symbol) { - return symbol.has(); - })}) { // 15.5.2.4(2) - evaluate::SayWithDeclaration(messages, *finalizer, - "Actual argument associated with TYPE(*) %s may not have FINAL subroutine '%s'"_err_en_US, - dummyName, finalizer->name()); + const auto &finals{ + derived->typeSymbol().get().finals()}; + if (!finals.empty()) { // 15.5.2.4(2) + if (auto *msg{messages.Say( + "Actual argument associated with TYPE(*) %s may not have derived type '%s' with FINAL subroutine '%s'"_err_en_US, + dummyName, derived->typeSymbol().name(), + finals.begin()->first)}) { + msg->Attach(finals.begin()->first, + "FINAL subroutine '%s' in derived type '%s'"_en_US, + finals.begin()->first, derived->typeSymbol().name()); + } } } if (actualIsCoindexed) { @@ -431,14 +434,14 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, "If a POINTER or ALLOCATABLE dummy or actual argument is polymorphic, both must be so"_err_en_US); } } else if (!actualIsUnlimited && typesCompatible) { - if (!actualType.type().IsTypeCompatibleWith(dummy.type.type())) { + if (!actualType.type().IsTkCompatibleWith(dummy.type.type())) { if (dummy.intent == common::Intent::In) { // extension: allow with warning, rule is only relevant for definables messages.Say( - "POINTER or ALLOCATABLE dummy and actual arguments should have the same declared type"_en_US); + "POINTER or ALLOCATABLE dummy and actual arguments should have the same declared type and kind"_en_US); } else { messages.Say( - "POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type"_err_en_US); + "POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type and kind"_err_en_US); } } if (const auto *derived{ diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 896af3cc83e08..dee26ab592270 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -66,6 +66,10 @@ class CheckHelper { void CheckSubprogram(const Symbol &, const SubprogramDetails &); void CheckAssumedTypeEntity(const Symbol &, const ObjectEntityDetails &); void CheckDerivedType(const Symbol &, const DerivedTypeDetails &); + bool CheckFinal( + const Symbol &subroutine, SourceName, const Symbol &derivedType); + bool CheckDistinguishableFinals(const Symbol &f1, SourceName f1name, + const Symbol &f2, SourceName f2name, const Symbol &derivedType); void CheckGeneric(const Symbol &, const GenericDetails &); void CheckHostAssoc(const Symbol &, const HostAssocDetails &); bool CheckDefinedOperator( @@ -781,24 +785,24 @@ void CheckHelper::CheckSubprogram( } void CheckHelper::CheckDerivedType( - const Symbol &symbol, const DerivedTypeDetails &details) { - const Scope *scope{symbol.scope()}; + const Symbol &derivedType, const DerivedTypeDetails &details) { + const Scope *scope{derivedType.scope()}; if (!scope) { CHECK(details.isForwardReferenced()); return; } - CHECK(scope->symbol() == &symbol); + CHECK(scope->symbol() == &derivedType); CHECK(scope->IsDerivedType()); - if (symbol.attrs().test(Attr::ABSTRACT) && // C734 - (symbol.attrs().test(Attr::BIND_C) || details.sequence())) { + if (derivedType.attrs().test(Attr::ABSTRACT) && // C734 + (derivedType.attrs().test(Attr::BIND_C) || details.sequence())) { messages_.Say("An ABSTRACT derived type must be extensible"_err_en_US); } - if (const DeclTypeSpec * parent{FindParentTypeSpec(symbol)}) { + if (const DeclTypeSpec * parent{FindParentTypeSpec(derivedType)}) { const DerivedTypeSpec *parentDerived{parent->AsDerived()}; if (!IsExtensibleType(parentDerived)) { // C705 messages_.Say("The parent type is not extensible"_err_en_US); } - if (!symbol.attrs().test(Attr::ABSTRACT) && parentDerived && + if (!derivedType.attrs().test(Attr::ABSTRACT) && parentDerived && parentDerived->typeSymbol().attrs().test(Attr::ABSTRACT)) { ScopeComponentIterator components{*parentDerived}; for (const Symbol &component : components) { @@ -811,7 +815,7 @@ void CheckHelper::CheckDerivedType( } } } - DerivedTypeSpec derived{symbol.name(), symbol}; + DerivedTypeSpec derived{derivedType.name(), derivedType}; derived.set_scope(*scope); if (FindCoarrayUltimateComponent(derived) && // C736 !(parentDerived && FindCoarrayUltimateComponent(*parentDerived))) { @@ -819,7 +823,7 @@ void CheckHelper::CheckDerivedType( "Type '%s' has a coarray ultimate component so the type at the base " "of its type extension chain ('%s') must be a type that has a " "coarray ultimate component"_err_en_US, - symbol.name(), scope->GetDerivedTypeBase().GetSymbol()->name()); + derivedType.name(), scope->GetDerivedTypeBase().GetSymbol()->name()); } if (FindEventOrLockPotentialComponent(derived) && // C737 !(FindEventOrLockPotentialComponent(*parentDerived) || @@ -829,13 +833,154 @@ void CheckHelper::CheckDerivedType( "at the base of its type extension chain ('%s') must either have an " "EVENT_TYPE or LOCK_TYPE component, or be EVENT_TYPE or " "LOCK_TYPE"_err_en_US, - symbol.name(), scope->GetDerivedTypeBase().GetSymbol()->name()); + derivedType.name(), scope->GetDerivedTypeBase().GetSymbol()->name()); } } - if (HasIntrinsicTypeName(symbol)) { // C729 + if (HasIntrinsicTypeName(derivedType)) { // C729 messages_.Say("A derived type name cannot be the name of an intrinsic" " type"_err_en_US); } + std::map previous; + for (const auto &pair : details.finals()) { + SourceName source{pair.first}; + const Symbol &ref{*pair.second}; + if (CheckFinal(ref, source, derivedType) && + std::all_of(previous.begin(), previous.end(), + [&](std::pair prev) { + return CheckDistinguishableFinals( + ref, source, *prev.second, prev.first, derivedType); + })) { + previous.emplace(source, ref); + } + } +} + +// C786 +bool CheckHelper::CheckFinal( + const Symbol &subroutine, SourceName finalName, const Symbol &derivedType) { + if (!IsModuleProcedure(subroutine)) { + SayWithDeclaration(subroutine, finalName, + "FINAL subroutine '%s' of derived type '%s' must be a module procedure"_err_en_US, + subroutine.name(), derivedType.name()); + return false; + } + const Procedure *proc{Characterize(subroutine)}; + if (!proc) { + return false; // error recovery + } + if (!proc->IsSubroutine()) { + SayWithDeclaration(subroutine, finalName, + "FINAL subroutine '%s' of derived type '%s' must be a subroutine"_err_en_US, + subroutine.name(), derivedType.name()); + return false; + } + if (proc->dummyArguments.size() != 1) { + SayWithDeclaration(subroutine, finalName, + "FINAL subroutine '%s' of derived type '%s' must have a single dummy argument"_err_en_US, + subroutine.name(), derivedType.name()); + return false; + } + const auto &arg{proc->dummyArguments[0]}; + const Symbol *errSym{&subroutine}; + if (const auto *details{subroutine.detailsIf()}) { + if (!details->dummyArgs().empty()) { + if (const Symbol * argSym{details->dummyArgs()[0]}) { + errSym = argSym; + } + } + } + const auto *ddo{std::get_if(&arg.u)}; + if (!ddo) { + SayWithDeclaration(subroutine, finalName, + "FINAL subroutine '%s' of derived type '%s' must have a single dummy argument that is a data object"_err_en_US, + subroutine.name(), derivedType.name()); + return false; + } + bool ok{true}; + if (arg.IsOptional()) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have an OPTIONAL dummy argument"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } + if (ddo->attrs.test(DummyDataObject::Attr::Allocatable)) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have an ALLOCATABLE dummy argument"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } + if (ddo->attrs.test(DummyDataObject::Attr::Pointer)) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have a POINTER dummy argument"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } + if (ddo->intent == common::Intent::Out) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have a dummy argument with INTENT(OUT)"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } + if (ddo->attrs.test(DummyDataObject::Attr::Value)) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have a dummy argument with the VALUE attribute"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } + if (ddo->type.corank() > 0) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have a coarray dummy argument"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } + if (ddo->type.type().IsPolymorphic()) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must not have a polymorphic dummy argument"_err_en_US, + subroutine.name(), derivedType.name()); + ok = false; + } else if (ddo->type.type().category() != TypeCategory::Derived || + &ddo->type.type().GetDerivedTypeSpec().typeSymbol() != &derivedType) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must have a TYPE(%s) dummy argument"_err_en_US, + subroutine.name(), derivedType.name(), derivedType.name()); + ok = false; + } else { // check that all LEN type parameters are assumed + for (auto ref : OrderParameterDeclarations(derivedType)) { + if (const auto *paramDetails{ref->detailsIf()}) { + if (paramDetails->attr() == common::TypeParamAttr::Len) { + const auto *value{ + ddo->type.type().GetDerivedTypeSpec().FindParameter(ref->name())}; + if (!value || !value->isAssumed()) { + SayWithDeclaration(*errSym, finalName, + "FINAL subroutine '%s' of derived type '%s' must have a dummy argument with an assumed LEN type parameter '%s=*'"_err_en_US, + subroutine.name(), derivedType.name(), ref->name()); + ok = false; + } + } + } + } + } + return ok; +} + +bool CheckHelper::CheckDistinguishableFinals(const Symbol &f1, + SourceName f1Name, const Symbol &f2, SourceName f2Name, + const Symbol &derivedType) { + const Procedure *p1{Characterize(f1)}; + const Procedure *p2{Characterize(f2)}; + if (p1 && p2) { + if (characteristics::Distinguishable(*p1, *p2)) { + return true; + } + if (auto *msg{messages_.Say(f1Name, + "FINAL subroutines '%s' and '%s' of derived type '%s' cannot be distinguished by rank or KIND type parameter value"_err_en_US, + f1Name, f2Name, derivedType.name())}) { + msg->Attach(f2Name, "FINAL declaration of '%s'"_en_US, f2.name()) + .Attach(f1.name(), "Definition of '%s'"_en_US, f1Name) + .Attach(f2.name(), "Definition of '%s'"_en_US, f2Name); + } + } + return false; } void CheckHelper::CheckHostAssoc( diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index ef62a94b1b89e..f714a3b1f9bfd 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -177,7 +177,7 @@ std::string ModFileWriter::GetAsString(const Symbol &symbol) { } // Put out the visible symbols from scope. -void ModFileWriter::PutSymbols(const Scope &scope) { +bool ModFileWriter::PutSymbols(const Scope &scope) { std::string buf; llvm::raw_string_ostream typeBindings{ buf}; // stuff after CONTAINS in derived type @@ -187,6 +187,9 @@ void ModFileWriter::PutSymbols(const Scope &scope) { if (auto str{typeBindings.str()}; !str.empty()) { CHECK(scope.IsDerivedType()); decls_ << "contains\n" << str; + return true; + } else { + return false; } } @@ -257,9 +260,6 @@ void ModFileWriter::PutSymbol( decls_ << "::/" << symbol.name() << "/\n"; } }, - [&](const FinalProcDetails &) { - typeBindings << "final::" << symbol.name() << '\n'; - }, [](const HostAssocDetails &) {}, [](const MiscDetails &) {}, [&](const auto &) { PutEntity(decls_, symbol); }, @@ -287,7 +287,17 @@ void ModFileWriter::PutDerivedType(const Symbol &typeSymbol) { if (details.sequence()) { decls_ << "sequence\n"; } - PutSymbols(typeScope); + bool contains{PutSymbols(typeScope)}; + if (!details.finals().empty()) { + const char *sep{contains ? "final::" : "contains\nfinal::"}; + for (const auto &pair : details.finals()) { + decls_ << sep << pair.second->name(); + sep = ","; + } + if (*sep == ',') { + decls_ << '\n'; + } + } decls_ << "end type\n"; } diff --git a/flang/lib/Semantics/mod-file.h b/flang/lib/Semantics/mod-file.h index 17ffe804c5be3..08bf2e864ffa1 100644 --- a/flang/lib/Semantics/mod-file.h +++ b/flang/lib/Semantics/mod-file.h @@ -53,7 +53,8 @@ class ModFileWriter { void WriteOne(const Scope &); void Write(const Symbol &); std::string GetAsString(const Symbol &); - void PutSymbols(const Scope &); + // Returns true if a derived type with bindings and "contains" was emitted + bool PutSymbols(const Scope &); void PutSymbol(llvm::raw_ostream &, const Symbol &); void PutDerivedType(const Symbol &); void PutSubprogram(const Symbol &); diff --git a/flang/lib/Semantics/pointer-assignment.cpp b/flang/lib/Semantics/pointer-assignment.cpp index 58719deae366c..735e842411b1c 100644 --- a/flang/lib/Semantics/pointer-assignment.cpp +++ b/flang/lib/Semantics/pointer-assignment.cpp @@ -219,7 +219,7 @@ bool PointerAssignmentChecker::Check(const evaluate::Designator &d) { " derived type when target is unlimited polymorphic"_err_en_US; } } else { - if (!lhsType_->type().IsTypeCompatibleWith(rhsType->type())) { + if (!lhsType_->type().IsTkCompatibleWith(rhsType->type())) { msg = MessageFormattedText{ "Target type %s is not compatible with pointer type %s"_err_en_US, rhsType->type().AsFortran(), lhsType_->type().AsFortran()}; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index b501ac69098f9..0bdf871cd4851 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -4028,8 +4028,22 @@ void DeclarationVisitor::Post( } void DeclarationVisitor::Post(const parser::FinalProcedureStmt &x) { - for (auto &name : x.v) { - MakeTypeSymbol(name, FinalProcDetails{}); + if (currScope().IsDerivedType() && currScope().symbol()) { + if (auto *details{currScope().symbol()->detailsIf()}) { + for (const auto &subrName : x.v) { + if (const auto *name{ResolveName(subrName)}) { + auto pair{ + details->finals().emplace(name->source, DEREF(name->symbol))}; + if (!pair.second) { // C787 + Say(name->source, + "FINAL subroutine '%s' already appeared in this derived type"_err_en_US, + name->source) + .Attach(pair.first->first, + "earlier appearance of this FINAL subroutine"_en_US); + } + } + } + } } } diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index 1e046e013c8f1..06c4ac4275a08 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -228,7 +228,6 @@ std::string DetailsToString(const Details &details) { [](const ProcBindingDetails &) { return "ProcBinding"; }, [](const NamelistDetails &) { return "Namelist"; }, [](const CommonBlockDetails &) { return "CommonBlockDetails"; }, - [](const FinalProcDetails &) { return "FinalProc"; }, [](const TypeParamDetails &) { return "TypeParam"; }, [](const MiscDetails &) { return "Misc"; }, [](const AssocEntityDetails &) { return "AssocEntity"; }, @@ -436,7 +435,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Details &details) { os << ' ' << object->name(); } }, - [&](const FinalProcDetails &) {}, [&](const TypeParamDetails &x) { DumpOptional(os, "type", x.type()); os << ' ' << common::EnumToString(x.attr()); diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 848aef08e3a1f..8bcbdc70ec117 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -637,20 +637,23 @@ bool IsFinalizable(const Symbol &symbol) { } bool IsFinalizable(const DerivedTypeSpec &derived) { - ScopeComponentIterator components{derived}; - return std::find_if(components.begin(), components.end(), - [](const Symbol &x) { return x.has(); }) != - components.end(); + if (!derived.typeSymbol().get().finals().empty()) { + return true; + } + DirectComponentIterator components{derived}; + return bool{std::find_if(components.begin(), components.end(), + [](const Symbol &component) { return IsFinalizable(component); })}; } -// TODO The following function returns true for all types with FINAL procedures -// This is because we don't yet fill in the data for FinalProcDetails bool HasImpureFinal(const DerivedTypeSpec &derived) { - ScopeComponentIterator components{derived}; - return std::find_if( - components.begin(), components.end(), [](const Symbol &x) { - return x.has() && !x.attrs().test(Attr::PURE); - }) != components.end(); + if (const auto *details{ + derived.typeSymbol().detailsIf()}) { + const auto &finals{details->finals()}; + return std::any_of(finals.begin(), finals.end(), + [](const auto &x) { return !x.second->attrs().test(Attr::PURE); }); + } else { + return false; + } } bool IsCoarray(const Symbol &symbol) { return symbol.Corank() > 0; } @@ -701,10 +704,12 @@ bool IsInBlankCommon(const Symbol &symbol) { // C722 and C723: For a function to be assumed length, it must be external and // of CHARACTER type bool IsExternal(const Symbol &symbol) { - return (symbol.has() && symbol.owner().IsGlobal()) || - symbol.attrs().test(Attr::EXTERNAL); + return ClassifyProcedure(symbol) == ProcedureDefinitionClass::External; } +bool IsModuleProcedure(const Symbol &symbol) { + return ClassifyProcedure(symbol) == ProcedureDefinitionClass::Module; +} const Symbol *IsExternalInPureContext( const Symbol &symbol, const Scope &scope) { if (const auto *pureProc{FindPureProcedureContaining(scope)}) { @@ -1005,6 +1010,39 @@ const Symbol *FindSeparateModuleSubprogramInterface(const Symbol *proc) { return nullptr; } +ProcedureDefinitionClass ClassifyProcedure(const Symbol &symbol) { // 15.2.2 + const Symbol &ultimate{symbol.GetUltimate()}; + if (ultimate.attrs().test(Attr::INTRINSIC)) { + return ProcedureDefinitionClass::Intrinsic; + } else if (ultimate.attrs().test(Attr::EXTERNAL)) { + return ProcedureDefinitionClass::External; + } else if (const auto *procDetails{ultimate.detailsIf()}) { + if (procDetails->isDummy()) { + return ProcedureDefinitionClass::Dummy; + } else if (IsPointer(ultimate)) { + return ProcedureDefinitionClass::Pointer; + } + } else if (const Symbol * subp{FindSubprogram(symbol)}) { + if (const auto *subpDetails{subp->detailsIf()}) { + if (subpDetails->stmtFunction()) { + return ProcedureDefinitionClass::StatementFunction; + } + } + switch (ultimate.owner().kind()) { + case Scope::Kind::Global: + return ProcedureDefinitionClass::External; + case Scope::Kind::Module: + return ProcedureDefinitionClass::Module; + case Scope::Kind::MainProgram: + case Scope::Kind::Subprogram: + return ProcedureDefinitionClass::Internal; + default: + break; + } + } + return ProcedureDefinitionClass::None; +} + // ComponentIterator implementation template diff --git a/flang/test/Semantics/call03.f90 b/flang/test/Semantics/call03.f90 index b220325812541..28a0d29ca5058 100644 --- a/flang/test/Semantics/call03.f90 +++ b/flang/test/Semantics/call03.f90 @@ -29,7 +29,7 @@ subroutine subr01(this) class(tbp), intent(in) :: this end subroutine subroutine subr02(this) - class(final), intent(in) :: this + type(final), intent(inout) :: this end subroutine subroutine poly(x) @@ -113,7 +113,7 @@ subroutine test04 ! 15.5.2.4(2) subroutine test05 ! 15.5.2.4(2) type(final) :: x - !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have FINAL subroutine 'subr02' + !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have derived type 'final' with FINAL subroutine 'subr02' call typestar(x) end subroutine diff --git a/flang/test/Semantics/call05.f90 b/flang/test/Semantics/call05.f90 index c317d30116074..86da81d5e8f1c 100644 --- a/flang/test/Semantics/call05.f90 +++ b/flang/test/Semantics/call05.f90 @@ -89,9 +89,9 @@ subroutine test call spp(up) !ERROR: Actual argument type 'CLASS(*)' is not compatible with dummy argument type 't' call spa(ua) - !ERROR: POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type + !ERROR: POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type and kind call spp(pp2) - !ERROR: POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type + !ERROR: POINTER or ALLOCATABLE dummy and actual arguments must have the same declared type and kind call spa(pa2) !ERROR: Rank of dummy argument is 1, but actual argument has rank 2 call smp(mpmat) diff --git a/flang/test/Semantics/final01.f90 b/flang/test/Semantics/final01.f90 new file mode 100644 index 0000000000000..3f5915093dad7 --- /dev/null +++ b/flang/test/Semantics/final01.f90 @@ -0,0 +1,119 @@ +! RUN: %S/test_errors.sh %s %t %f18 +! Test FINAL subroutine constraints C786-C789 +module m1 + external :: external + intrinsic :: sin + real :: object + procedure(valid), pointer :: pointer + type :: parent(kind1, len1) + integer, kind :: kind1 = 1 + integer, len :: len1 = 1 + end type + type, extends(parent) :: child(kind2, len2) + integer, kind :: kind2 = 2 + integer, len :: len2 = 2 + contains + final :: valid +!ERROR: FINAL subroutine 'external' of derived type 'child' must be a module procedure +!ERROR: FINAL subroutine 'sin' of derived type 'child' must be a module procedure +!ERROR: FINAL subroutine 'object' of derived type 'child' must be a module procedure +!ERROR: FINAL subroutine 'pointer' of derived type 'child' must be a module procedure +!ERROR: FINAL subroutine 'func' of derived type 'child' must be a subroutine + final :: external, sin, object, pointer, func +!ERROR: FINAL subroutine 's01' of derived type 'child' must have a single dummy argument that is a data object +!ERROR: FINAL subroutine 's02' of derived type 'child' must have a single dummy argument that is a data object +!ERROR: FINAL subroutine 's03' of derived type 'child' must not have a dummy argument with INTENT(OUT) +!ERROR: FINAL subroutine 's04' of derived type 'child' must not have a dummy argument with the VALUE attribute +!ERROR: FINAL subroutine 's05' of derived type 'child' must not have a POINTER dummy argument +!ERROR: FINAL subroutine 's06' of derived type 'child' must not have an ALLOCATABLE dummy argument +!ERROR: FINAL subroutine 's07' of derived type 'child' must not have a coarray dummy argument +!ERROR: FINAL subroutine 's08' of derived type 'child' must not have a polymorphic dummy argument +!ERROR: FINAL subroutine 's09' of derived type 'child' must not have a polymorphic dummy argument +!ERROR: FINAL subroutine 's10' of derived type 'child' must not have an OPTIONAL dummy argument + final :: s01, s02, s03, s04, s05, s06, s07, s08, s09, s10 +!ERROR: FINAL subroutine 's11' of derived type 'child' must have a single dummy argument +!ERROR: FINAL subroutine 's12' of derived type 'child' must have a single dummy argument +!ERROR: FINAL subroutine 's13' of derived type 'child' must have a dummy argument with an assumed LEN type parameter 'len1=*' +!ERROR: FINAL subroutine 's13' of derived type 'child' must have a dummy argument with an assumed LEN type parameter 'len2=*' +!ERROR: FINAL subroutine 's14' of derived type 'child' must have a dummy argument with an assumed LEN type parameter 'len2=*' +!ERROR: FINAL subroutine 's15' of derived type 'child' must have a dummy argument with an assumed LEN type parameter 'len1=*' +!ERROR: FINAL subroutine 's16' of derived type 'child' must not have a polymorphic dummy argument +!ERROR: FINAL subroutine 's17' of derived type 'child' must have a TYPE(child) dummy argument + final :: s11, s12, s13, s14, s15, s16, s17 +!ERROR: FINAL subroutine 'valid' already appeared in this derived type + final :: valid +!ERROR: FINAL subroutines 'valid2' and 'valid' of derived type 'child' cannot be distinguished by rank or KIND type parameter value + final :: valid2 + end type + contains + subroutine valid(x) + type(child(len1=*, len2=*)), intent(inout) :: x + end subroutine + subroutine valid2(x) + type(child(len1=*, len2=*)), intent(inout) :: x + end subroutine + real function func(x) + type(child(len1=*, len2=*)), intent(inout) :: x + func = 0. + end function + subroutine s01(*) + end subroutine + subroutine s02(x) + external :: x + end subroutine + subroutine s03(x) + type(child(kind1=3, len1=*, len2=*)), intent(out) :: x + end subroutine + subroutine s04(x) + type(child(kind1=4, len1=*, len2=*)), value :: x + end subroutine + subroutine s05(x) + type(child(kind1=5, len1=*, len2=*)), pointer :: x + end subroutine + subroutine s06(x) + type(child(kind1=6, len1=*, len2=*)), allocatable :: x + end subroutine + subroutine s07(x) + type(child(kind1=7, len1=*, len2=*)) :: x[*] + end subroutine + subroutine s08(x) + class(child(kind1=8, len1=*, len2=*)) :: x + end subroutine + subroutine s09(x) + class(*) :: x + end subroutine + subroutine s10(x) + type(child(kind1=10, len1=*, len2=*)), optional :: x + end subroutine + subroutine s11(x, y) + type(child(kind1=11, len1=*, len2=*)) :: x, y + end subroutine + subroutine s12 + end subroutine + subroutine s13(x) + type(child(kind1=13)) :: x + end subroutine + subroutine s14(x) + type(child(kind1=14, len1=*,len2=2)) :: x + end subroutine + subroutine s15(x) + type(child(kind1=15, len2=*)) :: x + end subroutine + subroutine s16(x) + type(*) :: x + end subroutine + subroutine s17(x) + type(parent(kind1=17, len1=*)) :: x + end subroutine + subroutine nested + type :: t + contains +!ERROR: FINAL subroutine 'internal' of derived type 't' must be a module procedure + final :: internal + end type + contains + subroutine internal(x) + type(t), intent(inout) :: x + end subroutine + end subroutine +end module diff --git a/flang/test/Semantics/modfile10.f90 b/flang/test/Semantics/modfile10.f90 index 2949ab6965dc8..ef10f1f23e8e2 100644 --- a/flang/test/Semantics/modfile10.f90 +++ b/flang/test/Semantics/modfile10.f90 @@ -64,8 +64,8 @@ subroutine test ! type::t2 ! integer(4)::x ! contains -! final::c ! procedure,non_overridable,private::d +! final::c ! end type ! type,abstract::t2a ! contains diff --git a/flang/test/Semantics/resolve32.f90 b/flang/test/Semantics/resolve32.f90 index d06eede6ced5a..326ae1f909cf0 100644 --- a/flang/test/Semantics/resolve32.f90 +++ b/flang/test/Semantics/resolve32.f90 @@ -57,7 +57,7 @@ subroutine foo contains procedure, nopass :: b => s final :: f - !ERROR: Type parameter, component, or procedure binding 'i' already defined in this type + !ERROR: FINAL subroutine 'i' of derived type 't2' must be a module procedure final :: i end type type t3 diff --git a/flang/test/Semantics/resolve55.f90 b/flang/test/Semantics/resolve55.f90 index 9e61265430043..48af4abcf28ba 100644 --- a/flang/test/Semantics/resolve55.f90 +++ b/flang/test/Semantics/resolve55.f90 @@ -36,25 +36,24 @@ subroutine s4(arg) end do end subroutine s4 -subroutine s5() +module m ! Cannot have a variable of a finalizable type in a locality spec type t1 integer :: i contains final :: f end type t1 - - type(t1) :: var - -!ERROR: Finalizable variable 'var' not allowed in a locality-spec - do concurrent(i=1:5) local(var) - end do - -contains + contains + subroutine s5() + type(t1) :: var + !ERROR: Finalizable variable 'var' not allowed in a locality-spec + do concurrent(i=1:5) local(var) + end do + end subroutine s5 subroutine f(x) type(t1) :: x end subroutine f -end subroutine s5 +end module m subroutine s6 ! Cannot have a nonpointer polymorphic dummy argument in a locality spec From 55cff5b288650f0ce814c3c85041852bbed554b8 Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Wed, 30 Sep 2020 10:40:51 +0200 Subject: [PATCH 215/544] [OpenMP][libomptarget] make omp_get_initial_device 5.1 compliant OpenMP 5.1 defines omp_get_initial_device to return the same value as omp_get_num_devices. Since this change is also 5.0 compliant, no versioning is needed. Differential Revision: https://reviews.llvm.org/D88149 --- openmp/libomptarget/include/omptarget.h | 1 - openmp/libomptarget/src/api.cpp | 5 +++-- openmp/runtime/src/kmp.h | 1 - openmp/runtime/src/kmp_ftn_entry.h | 10 ++++++---- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h index 11d112159dc76..9e7c28b14f8b4 100644 --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -21,7 +21,6 @@ #define OFFLOAD_FAIL (~0) #define OFFLOAD_DEVICE_DEFAULT -1 -#define HOST_DEVICE -10 /// Data attributes for each data reference used in an OpenMP target region. enum tgt_map_type { diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 7e5f49a8b3987..5155246a9ea2c 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -29,8 +29,9 @@ EXTERN int omp_get_num_devices(void) { } EXTERN int omp_get_initial_device(void) { - DP("Call to omp_get_initial_device returning %d\n", HOST_DEVICE); - return HOST_DEVICE; + int hostDevice = omp_get_num_devices(); + DP("Call to omp_get_initial_device returning %d\n", hostDevice); + return hostDevice; } EXTERN void *omp_target_alloc(size_t size, int device_num) { diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 52276ebca41f5..e78e3b9c7df3f 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3876,7 +3876,6 @@ extern int __kmpc_get_target_offload(); // Constants used in libomptarget #define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device. -#define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure. #define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices". // OMP Pause Resource diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index b4b0dea0d1afa..de9156ddc4810 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -966,13 +966,15 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) { int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) { #if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) - return KMP_HOST_DEVICE; + // same as omp_get_num_devices() + return 0; #else int (*fptr)(); if ((*(void **)(&fptr) = dlsym(RTLD_NEXT, "omp_get_initial_device"))) { return (*fptr)(); } else { // liboffload & libomptarget don't exist - return KMP_HOST_DEVICE; + // same as omp_get_num_devices() + return 0; } #endif } @@ -1319,14 +1321,14 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) { // loaded, we assume we are on the host and return KMP_HOST_DEVICE. // Compiler/libomptarget will handle this if called inside target. int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; -int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return KMP_HOST_DEVICE; } +int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return FTN_GET_INITIAL_DEVICE(); } // Compiler will ensure that this is only called from host in sequential region int FTN_STDCALL FTN_PAUSE_RESOURCE(kmp_pause_status_t kind, int device_num) { #ifdef KMP_STUB return 1; // just fail #else - if (device_num == KMP_HOST_DEVICE) + if (device_num == FTN_GET_INITIAL_DEVICE()) return __kmpc_pause_resource(kind); else { #if !KMP_OS_WINDOWS From 6104b30446aa976006fd322af4a57a8f0124f94f Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Thu, 1 Oct 2020 00:53:41 +0200 Subject: [PATCH 216/544] [OpenMP][OMPT] Update OMPT tests for newly added GOMP interface patches This patch updates the expected results for the GOMP interface patches: D87267, D87269, and D87271. The taskwait-depend test is changed to really use taskwait-depend and copied to an task_if0-depend test. To pass the tests, the handling of the return address was fixed. Differential Revision: https://reviews.llvm.org/D87680 --- openmp/runtime/src/kmp_gsupport.cpp | 3 + openmp/runtime/src/kmp_taskdeps.cpp | 5 +- openmp/runtime/src/ompt-specific.h | 5 ++ .../ompt/tasks/dependences_mutexinoutset.c | 6 +- .../runtime/test/ompt/tasks/task_if0-depend.c | 75 +++++++++++++++++++ .../runtime/test/ompt/tasks/taskwait-depend.c | 14 ++-- 6 files changed, 96 insertions(+), 12 deletions(-) create mode 100644 openmp/runtime/test/ompt/tasks/task_if0-depend.c diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 4312e25499d85..0909070dbe02f 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -1891,6 +1891,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) { kmp_depend_info_t dep_list[ndeps]; for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid)); } diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index bf4865dd540ed..77148d5ec4286 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -520,7 +520,6 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, #if OMPT_SUPPORT if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); if (!current_task->ompt_task_info.frame.enter_frame.ptr) current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -531,7 +530,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, current_task ? &(current_task->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, - OMPT_LOAD_RETURN_ADDRESS(gtid)); + OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); } new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -700,7 +699,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, current_task ? &(current_task->ompt_task_info.frame) : NULL, taskwait_task_data, ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); } } diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h index fa5c5662c6499..8c54a79782842 100644 --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -81,6 +81,11 @@ inline void *__ompt_load_return_address(int gtid) { __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ __builtin_return_address(0) #define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) +#define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \ + ((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ + __kmp_threads[gtid]->th.ompt_thread_info.return_address)? \ + __ompt_load_return_address(gtid): \ + __builtin_return_address(0)) //****************************************************************************** // inline functions diff --git a/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c b/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c index f2ecea347fce9..1953682b07fd3 100644 --- a/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c +++ b/openmp/runtime/test/ompt/tasks/dependences_mutexinoutset.c @@ -1,10 +1,10 @@ // RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s // REQUIRES: ompt -// GCC does not pass in mutexinoutset -// clang 9 introduced codegen for mutexinoutset +// GCC 9 introduced codegen for mutexinoutset +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8 -// UNSUPPORTED: gcc +// clang 9 introduced codegen for mutexinoutset // UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8 #include "callback.h" diff --git a/openmp/runtime/test/ompt/tasks/task_if0-depend.c b/openmp/runtime/test/ompt/tasks/task_if0-depend.c new file mode 100644 index 0000000000000..2ecbf02faf449 --- /dev/null +++ b/openmp/runtime/test/ompt/tasks/task_if0-depend.c @@ -0,0 +1,75 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +int main() { + int x = 0; +#pragma omp parallel num_threads(2) + { +#pragma omp master + { + print_ids(0); + printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value, + &x); +#pragma omp task depend(out : x) + { x++; } + print_fuzzy_address(1); +#pragma omp task if (0) depend(in : x) + {} + print_fuzzy_address(2); + } + } + + return 0; +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + +// make sure initial data pointers are null +// CHECK-NOT: 0: new_task_data initially not null + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], +// CHECK-SAME: reenter_frame=[[NULL]] + +// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred| +// CHECK-SAME: ompt_task_mergeable=1207959556, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_in)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[SECOND_TASK]] + +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] diff --git a/openmp/runtime/test/ompt/tasks/taskwait-depend.c b/openmp/runtime/test/ompt/tasks/taskwait-depend.c index 38e416e70e32f..e62ad70f26ce8 100644 --- a/openmp/runtime/test/ompt/tasks/taskwait-depend.c +++ b/openmp/runtime/test/ompt/tasks/taskwait-depend.c @@ -1,9 +1,13 @@ // RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s // REQUIRES: ompt -// The GOMP wrapper does not handle `task if(0) depend()` and drops the -// dependency. Once this is fixed, reevaluate the GCC status: -// XFAIL: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9, gcc-10 +// taskwait with depend clause was introduced with gcc-9 +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8 + +// clang does not yet support taskwait with depend clause +// clang-12 introduced parsing, but no codegen +// update expected result when codegen in clang was added +// XFAIL: clang #include "callback.h" #include @@ -20,9 +24,7 @@ int main() { #pragma omp task depend(out : x) { x++; } print_fuzzy_address(1); - //#pragma omp taskwait depend(in: x) <-- currently not supported in clang -#pragma omp task if (0) depend(in : x) - {} + #pragma omp taskwait depend(in: x) print_fuzzy_address(2); } } From 21cf2e6c263d7a50654653bce4e83ab463fae580 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 30 Sep 2020 16:05:17 -0700 Subject: [PATCH 217/544] Handle unknown OSes in DarwinTargetInfo::getExnObjectAlignment rdar://problem/69727650 --- clang/lib/Basic/Targets/OSTargets.h | 3 ++- clang/test/SemaCXX/warn-overaligned-type-thrown.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index e070676930544..9b96690f413cb 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -154,7 +154,8 @@ class LLVM_LIBRARY_VISIBILITY DarwinTargetInfo : public OSTargetInfo { MinVersion = llvm::VersionTuple(5U); break; default: - llvm_unreachable("Unexpected OS"); + // Conservatively return 8 bytes if OS is unknown. + return 64; } unsigned Major, Minor, Micro; diff --git a/clang/test/SemaCXX/warn-overaligned-type-thrown.cpp b/clang/test/SemaCXX/warn-overaligned-type-thrown.cpp index d7468445f8b79..9f2386ddc3c61 100644 --- a/clang/test/SemaCXX/warn-overaligned-type-thrown.cpp +++ b/clang/test/SemaCXX/warn-overaligned-type-thrown.cpp @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple arm64-apple-tvos10 -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions -DUNDERALIGNED %s // RUN: %clang_cc1 -triple arm64-apple-watchos4 -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions -DUNDERALIGNED %s // RUN: %clang_cc1 -triple arm-linux-gnueabi -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions -DUNDERALIGNED %s +// RUN: %clang_cc1 -triple thumbv7em-apple-unknown-macho -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions -DUNDERALIGNED %s // RUN: %clang_cc1 -triple x86_64-apple-macosx10.14 -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions %s // RUN: %clang_cc1 -triple arm64-apple-ios12 -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions %s // RUN: %clang_cc1 -triple arm64-apple-tvos12 -verify -fsyntax-only -std=c++11 -fcxx-exceptions -fexceptions %s From 66d2e3f495948412602db4507359b4612639e523 Mon Sep 17 00:00:00 2001 From: Ahsan Saghir Date: Tue, 29 Sep 2020 09:40:38 -0500 Subject: [PATCH 218/544] [PowerPC] Add outer product instructions for MMA This patch adds outer product instructions for MMA, including related infrastructure, and their tests. Depends on D84968. Reviewed By: #powerpc, bsaleil, amyk Differential Revision: https://reviews.llvm.org/D88043 --- .../Target/PowerPC/AsmParser/PPCAsmParser.cpp | 5 + .../PowerPC/Disassembler/PPCDisassembler.cpp | 9 + .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 10 + .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.h | 3 + llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 513 ++++++++++++++++++ .../PowerPC/ppc64-encoding-ISA31.txt | 174 ++++++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s | 232 ++++++++ 7 files changed, 946 insertions(+) diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index a666d28eabf03..5d0f93704fc9f 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -528,6 +528,11 @@ struct PPCOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); } + void addRegVSRpEvenRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(CRBITRegs[getCRBit()])); diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 38e05414bf01b..cc9ffc94ead97 100644 --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -212,6 +212,15 @@ static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } +static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo & 1) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1])); + return MCDisassembler::Success; +} + static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the memri field (imm, reg), which has the low 16-bits as the diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index d431628adee10..5f0769fd21f9d 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -94,6 +94,16 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, return 0; } +unsigned +PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo).isReg() && "Operand should be a register"); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) + << 1; + return RegBits; +} + unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index 4504cc6a7405e..347e163c9515e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -93,6 +93,9 @@ class PPCMCCodeEmitter : public MCCodeEmitter { unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + unsigned getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 6bdd07f346cb0..4ca03298ba883 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -647,6 +647,185 @@ class XForm_AT3 opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, let Inst{31} = 0; } +class XX3Form_AT3_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : I { + bits<3> AT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = AT; + let Inst{9-10} = 0; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class MMIRR_XX3Form_XY4P2_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<2> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-17} = PMSK; + let Inst{18-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XY4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_X4Y2_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<2> YMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-29} = YMSK; + let Inst{30-31} = 0; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XY4P8_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<8> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-23} = PMSK; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +class MMIRR_XX3Form_XYP4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XA; + bits<6> XB; + bits<4> XMSK; + bits<4> YMSK; + bits<4> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-19} = PMSK; + let Inst{20-23} = 0; + let Inst{24-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">; def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; @@ -802,6 +981,286 @@ let Predicates = [PrefixInstrs] in { } } +// Multiclass definitions for MMA accumulator instructions. +// ---------------------------------------------------------------------------- + +// Defines 2 unmasked instructions where the xo field for acc/non-acc version +// is even/odd. +multiclass ACC_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M844_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M444_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XYP4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XYP4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M244_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. +multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 +// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M244_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_M244_XOEO; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME#PN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 5 instructions, unmasked, operand negating. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M44_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M42_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// End of class definitions. +//----------------------------------------------------------------------------- + let Predicates = [MMA] in { def XXMFACC : XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", @@ -824,8 +1283,62 @@ let Predicates = [MMA] in { XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, []>; } + def XVI8GER4SPP : + XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), + "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; +} + +let Predicates = [MMA, PrefixInstrs] in { + def PMXVI8GER4SPP : + MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), + (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, + u4imm:$YMSK, u4imm:$PMSK), + "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", + IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; } +// MMA accumulating/non-accumulating instructions. +//------------------------------------------------------------------------------ + +// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN +// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN +defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), + "xvbf16ger2", "$AT, $XA, $XB">; + +// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP +defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), + "xvi4ger8", "$AT, $XA, $XB">; + +// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP +defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), + "xvi8ger4", "$AT, $XA, $XB">; + +// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP +defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2", "$AT, $XA, $XB">; + +// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP +defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2s", "$AT, $XA, $XB">; + +// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN +// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN +defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), + "xvf16ger2", "$AT, $XA, $XB">; + +// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP +// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP +defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), + "xvf32ger", "$AT, $XA, $XB">; + +// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN +// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN +defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), + "xvf64ger", "$AT, $XA, $XB">; +//------------------------------------------------------------------------------ + def Concats { dag VecsToVecPair0 = (v256i1 (INSERT_SUBREG diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt index e5d7d306b48ab..edbeeaf8826d8 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt @@ -34,6 +34,180 @@ # CHECK: xxsetaccz 0 0x7c 0x03 0x01 0x62 +# CHECK: pmxvf16ger2 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x10 0x98 + +# CHECK: pmxvf16ger2pp 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x10 0x90 + +# CHECK: pmxvf16ger2pn 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x14 0x90 + +# CHECK: pmxvf16ger2np 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x12 0x90 + +# CHECK: pmxvf16ger2nn 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x16 0x90 + +# CHECK: pmxvf32ger 0, 1, 2, 4, 4 +0x07 0x90 0x00 0x44 0xec 0x01 0x10 0xd8 + +# CHECK: pmxvf32gerpp 0, 1, 2, 4, 4 +0x07 0x90 0x00 0x44 0xec 0x01 0x10 0xd0 + +# CHECK: pmxvf32gerpn 0, 1, 2, 4, 4 +0x07 0x90 0x00 0x44 0xec 0x01 0x14 0xd0 + +# CHECK: pmxvf32gernp 0, 1, 2, 4, 4 +0x07 0x90 0x00 0x44 0xec 0x01 0x12 0xd0 + +# CHECK: pmxvf32gernn 0, 1, 2, 4, 4 +0x07 0x90 0x00 0x44 0xec 0x01 0x16 0xd0 + +# CHECK: pmxvf64ger 0, 0, 2, 4, 3 +0x07 0x90 0x00 0x4c 0xec 0x00 0x11 0xd8 + +# CHECK: pmxvf64gerpp 0, 2, 2, 4, 3 +0x07 0x90 0x00 0x4c 0xec 0x02 0x11 0xd0 + +# CHECK: pmxvf64gerpn 0, 4, 2, 4, 3 +0x07 0x90 0x00 0x4c 0xec 0x04 0x15 0xd0 + +# CHECK: pmxvf64gernp 0, 62, 2, 4, 3 +0x07 0x90 0x00 0x4c 0xec 0x1e 0x13 0xd4 + +# CHECK: pmxvf64gernn 0, 30, 2, 4, 3 +0x07 0x90 0x00 0x4c 0xec 0x1e 0x17 0xd0 + +# CHECK: pmxvi4ger8 0, 1, 2, 4, 4, 4 +0x07 0x90 0x04 0x44 0xec 0x01 0x11 0x18 + +# CHECK: pmxvi4ger8pp 0, 1, 2, 4, 4, 4 +0x07 0x90 0x04 0x44 0xec 0x01 0x11 0x10 + +# CHECK: pmxvi8ger4 0, 1, 2, 4, 4, 4 +0x07 0x90 0x40 0x44 0xec 0x01 0x10 0x18 + +# CHECK: pmxvi8ger4pp 0, 1, 2, 4, 4, 4 +0x07 0x90 0x40 0x44 0xec 0x01 0x10 0x10 + +# CHECK: pmxvi16ger2s 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x11 0x58 + +# CHECK: pmxvi16ger2spp 0, 1, 2, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x01 0x11 0x50 + +# CHECK: xvf16ger2 0, 1, 2 +0xec 0x01 0x10 0x98 + +# CHECK: xvf16ger2pp 0, 1, 2 +0xec 0x01 0x10 0x90 + +# CHECK: xvf16ger2pn 0, 1, 2 +0xec 0x01 0x14 0x90 + +# CHECK: xvf16ger2np 0, 1, 2 +0xec 0x01 0x12 0x90 + +# CHECK: xvf16ger2nn 0, 1, 2 +0xec 0x01 0x16 0x90 + +# CHECK: xvf32ger 0, 1, 2 +0xec 0x01 0x10 0xd8 + +# CHECK: xvf32gerpp 0, 1, 2 +0xec 0x01 0x10 0xd0 + +# CHECK: xvf32gerpn 0, 1, 2 +0xec 0x01 0x14 0xd0 + +# CHECK: xvf32gernp 0, 1, 2 +0xec 0x01 0x12 0xd0 + +# CHECK: xvf32gernn 0, 1, 2 +0xec 0x01 0x16 0xd0 + +# CHECK: xvf64ger 0, 0, 2 +0xec 0x00 0x11 0xd8 + +# CHECK: xvf64gerpp 0, 2, 2 +0xec 0x02 0x11 0xd0 + +# CHECK: xvf64gerpn 0, 62, 2 +0xec 0x1e 0x15 0xd4 + +# CHECK: xvf64gernp 0, 0, 2 +0xec 0x00 0x13 0xd0 + +# CHECK: xvf64gernn 0, 0, 2 +0xec 0x00 0x17 0xd0 + +# CHECK: xvi4ger8 0, 1, 2 +0xec 0x01 0x11 0x18 + +# CHECK: xvi4ger8pp 0, 1, 2 +0xec 0x01 0x11 0x10 + +# CHECK: xvi8ger4 0, 1, 2 +0xec 0x01 0x10 0x18 + +# CHECK: xvi8ger4pp 0, 1, 2 +0xec 0x01 0x10 0x10 + +# CHECK: xvi16ger2s 0, 1, 2 +0xec 0x01 0x11 0x58 + +# CHECK: xvi16ger2spp 0, 1, 2 +0xec 0x01 0x11 0x50 + +# CHECK: xvbf16ger2 2, 33, 34 +0xed 0x01 0x11 0x9e + +# CHECK: xvbf16ger2pp 1, 33, 34 +0xec 0x81 0x11 0x96 + +# CHECK: xvbf16ger2pn 2, 33, 34 +0xed 0x01 0x15 0x96 + +# CHECK: xvbf16ger2np 1, 33, 34 +0xec 0x81 0x13 0x96 + +# CHECK: xvbf16ger2nn 2, 33, 34 +0xed 0x01 0x17 0x96 + +# CHECK: pmxvbf16ger2 2, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xed 0x01 0x11 0x9e + +# CHECK: pmxvbf16ger2pp 1, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x81 0x11 0x96 + +# CHECK: pmxvbf16ger2pn 2, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xed 0x01 0x15 0x96 + +# CHECK: pmxvbf16ger2np 1, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x81 0x13 0x96 + +# CHECK: pmxvbf16ger2nn 2, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xed 0x01 0x17 0x96 + +# CHECK: xvi8ger4spp 1, 33, 34 +0xec 0x81 0x13 0x1e + +# CHECK: xvi16ger2 1, 33, 34 +0xec 0x81 0x12 0x5e + +# CHECK: xvi16ger2pp 1, 33, 34 +0xec 0x81 0x13 0x5e + +# CHECK: pmxvi8ger4spp 1, 33, 34, 4, 4, 8 +0x07 0x90 0x80 0x44 0xec 0x81 0x13 0x1e + +# CHECK: pmxvi16ger2 1, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x81 0x12 0x5e + +# CHECK: pmxvi16ger2pp 1, 33, 34, 4, 4, 2 +0x07 0x90 0x80 0x44 0xec 0x81 0x13 0x5e + # CHECK: lxvp 2, 32(4) 0x18 0x44 0x00 0x20 diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s index 1abed2d031c2d..7ff7e02edafd7 100644 --- a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s @@ -32,6 +32,238 @@ # CHECK-BE: xxsetaccz 1 # encoding: [0x7c,0x83,0x01,0x62] # CHECK-LE: xxsetaccz 1 # encoding: [0x62,0x01,0x83,0x7c] xxsetaccz 1 +# CHECK-BE: pmxvf16ger2 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x10,0x98] +# CHECK-LE: pmxvf16ger2 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x98,0x10,0x01,0xec] + pmxvf16ger2 0, 1, 2, 4, 4, 2 +# CHECK-BE: pmxvf16ger2pp 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x10,0x90] +# CHECK-LE: pmxvf16ger2pp 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x90,0x10,0x01,0xec + pmxvf16ger2pp 0, 1, 2, 4, 4, 2 +# CHECK-BE: pmxvf16ger2pn 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x14,0x90] +# CHECK-LE: pmxvf16ger2pn 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x90,0x14,0x01,0xec] + pmxvf16ger2pn 0, 1, 2, 4, 4, 2 +# CHECK-BE: pmxvf16ger2np 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x12,0x90] +# CHECK-LE: pmxvf16ger2np 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x90,0x12,0x01,0xec] + pmxvf16ger2np 0, 1, 2, 4, 4, 2 +# CHECK-BE: pmxvf16ger2nn 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x16,0x90] +# CHECK-LE: pmxvf16ger2nn 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x90,0x16,0x01,0xec] + pmxvf16ger2nn 0, 1, 2, 4, 4, 2 +# CHECK-BE: pmxvf32ger 0, 1, 2, 4, 4 # encoding: [0x07,0x90,0x00,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x10,0xd8] +# CHECK-LE: pmxvf32ger 0, 1, 2, 4, 4 # encoding: [0x44,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd8,0x10,0x01,0xec] + pmxvf32ger 0, 1, 2, 4, 4 +# CHECK-BE: pmxvf32gerpp 0, 1, 2, 4, 4 # encoding: [0x07,0x90,0x00,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x10,0xd0] +# CHECK-LE: pmxvf32gerpp 0, 1, 2, 4, 4 # encoding: [0x44,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd0,0x10,0x01,0xec] + pmxvf32gerpp 0, 1, 2, 4, 4 +# CHECK-BE: pmxvf32gerpn 0, 1, 2, 4, 4 # encoding: [0x07,0x90,0x00,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x14,0xd0] +# CHECK-LE: pmxvf32gerpn 0, 1, 2, 4, 4 # encoding: [0x44,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd0,0x14,0x01,0xec] + pmxvf32gerpn 0, 1, 2, 4, 4 +# CHECK-BE: pmxvf32gernp 0, 1, 2, 4, 4 # encoding: [0x07,0x90,0x00,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x12,0xd0] +# CHECK-LE: pmxvf32gernp 0, 1, 2, 4, 4 # encoding: [0x44,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd0,0x12,0x01,0xec] + pmxvf32gernp 0, 1, 2, 4, 4 +# CHECK-BE: pmxvf32gernn 0, 1, 2, 4, 4 # encoding: [0x07,0x90,0x00,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x16,0xd0] +# CHECK-LE: pmxvf32gernn 0, 1, 2, 4, 4 # encoding: [0x44,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd0,0x16,0x01,0xec] + pmxvf32gernn 0, 1, 2, 4, 4 +# CHECK-BE: pmxvf64ger 0, 0, 2, 4, 3 # encoding: [0x07,0x90,0x00,0x4c, +# CHECK-BE-SAME: 0xec,0x00,0x11,0xd8] +# CHECK-LE: pmxvf64ger 0, 0, 2, 4, 3 # encoding: [0x4c,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd8,0x11,0x00,0xec] + pmxvf64ger 0, 0, 2, 4, 3 +# CHECK-BE: pmxvf64gerpp 0, 2, 2, 4, 3 # encoding: [0x07,0x90,0x00,0x4c, +# CHECK-BE-SAME: 0xec,0x02,0x11,0xd0] +# CHECK-LE: pmxvf64gerpp 0, 2, 2, 4, 3 # encoding: [0x4c,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd0,0x11,0x02,0xec] + pmxvf64gerpp 0, 2, 2, 4, 3 +# CHECK-BE: pmxvf64gerpn 0, 4, 2, 4, 3 # encoding: [0x07,0x90,0x00,0x4c, +# CHECK-BE-SAME: 0xec,0x04,0x15,0xd0] +# CHECK-LE: pmxvf64gerpn 0, 4, 2, 4, 3 # encoding: [0x4c,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd0,0x15,0x04,0xec] + pmxvf64gerpn 0, 4, 2, 4, 3 +# CHECK-BE: pmxvf64gernp 0, 32, 2, 4, 3 # encoding: [0x07,0x90,0x00,0x4c, +# CHECK-BE-SAME: 0xec,0x00,0x13,0xd4] +# CHECK-LE: pmxvf64gernp 0, 32, 2, 4, 3 # encoding: [0x4c,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd4,0x13,0x00,0xec] + pmxvf64gernp 0, 32, 2, 4, 3 +# CHECK-BE: pmxvf64gernn 0, 62, 2, 4, 3 # encoding: [0x07,0x90,0x00,0x4c, +# CHECK-BE-SAME: 0xec,0x1e,0x17,0xd4] +# CHECK-LE: pmxvf64gernn 0, 62, 2, 4, 3 # encoding: [0x4c,0x00,0x90,0x07, +# CHECK-LE-SAME: 0xd4,0x17,0x1e,0xec] + pmxvf64gernn 0, 62, 2, 4, 3 +# CHECK-BE: pmxvi4ger8 0, 1, 2, 4, 4, 4 # encoding: [0x07,0x90,0x04,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x11,0x18] +# CHECK-LE: pmxvi4ger8 0, 1, 2, 4, 4, 4 # encoding: [0x44,0x04,0x90,0x07 +# CHECK-LE-SAME: 0x18,0x11,0x01,0xec] + pmxvi4ger8 0, 1, 2, 4, 4, 4 +# CHECK-BE: pmxvi4ger8pp 0, 1, 2, 4, 4, 4 # encoding: [0x07,0x90,0x04,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x11,0x10] +# CHECK-LE: pmxvi4ger8pp 0, 1, 2, 4, 4, 4 # encoding: [0x44,0x04,0x90,0x07 +# CHECK-LE-SAME: 0x10,0x11,0x01,0xec] + pmxvi4ger8pp 0, 1, 2, 4, 4, 4 +# CHECK-BE: pmxvi8ger4 0, 1, 2, 4, 4, 4 # encoding: [0x07,0x90,0x40,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x10,0x18] +# CHECK-LE: pmxvi8ger4 0, 1, 2, 4, 4, 4 # encoding: [0x44,0x40,0x90,0x07, +# CHECK-LE-SAME: 0x18,0x10,0x01,0xec] + pmxvi8ger4 0, 1, 2, 4, 4, 4 +# CHECK-BE: pmxvi8ger4pp 0, 1, 2, 4, 4, 4 # encoding: [0x07,0x90,0x40,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x10,0x10] +# CHECK-LE: pmxvi8ger4pp 0, 1, 2, 4, 4, 4 # encoding: [0x44,0x40,0x90,0x07, +# CHECK-LE-SAME: 0x10,0x10,0x01,0xec] + pmxvi8ger4pp 0, 1, 2, 4, 4, 4 +# CHECK-BE: pmxvi16ger2s 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x11,0x58] +# CHECK-LE: pmxvi16ger2s 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x58,0x11,0x01,0xec] + pmxvi16ger2s 0, 1, 2, 4, 4, 2 +# CHECK-BE: pmxvi16ger2spp 0, 1, 2, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x01,0x11,0x50] +# CHECK-LE: pmxvi16ger2spp 0, 1, 2, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x50,0x11,0x01,0xec] + pmxvi16ger2spp 0, 1, 2, 4, 4, 2 +# CHECK-BE: xvf16ger2 0, 1, 2 # encoding: [0xec,0x01,0x10,0x98] +# CHECK-LE: xvf16ger2 0, 1, 2 # encoding: [0x98,0x10,0x01,0xec] + xvf16ger2 0, 1, 2 +# CHECK-BE: xvf16ger2pp 0, 1, 2 # encoding: [0xec,0x01,0x10,0x90] +# CHECK-LE: xvf16ger2pp 0, 1, 2 # encoding: [0x90,0x10,0x01,0xec] + xvf16ger2pp 0, 1, 2 +# CHECK-BE: xvf16ger2pn 0, 1, 2 # encoding: [0xec,0x01,0x14,0x90] +# CHECK-LE: xvf16ger2pn 0, 1, 2 # encoding: [0x90,0x14,0x01,0xec] + xvf16ger2pn 0, 1, 2 +# CHECK-BE: xvf16ger2np 0, 1, 2 # encoding: [0xec,0x01,0x12,0x90] +# CHECK-LE: xvf16ger2np 0, 1, 2 # encoding: [0x90,0x12,0x01,0xec] + xvf16ger2np 0, 1, 2 +# CHECK-BE: xvf16ger2nn 0, 1, 2 # encoding: [0xec,0x01,0x16,0x90] +# CHECK-LE: xvf16ger2nn 0, 1, 2 # encoding: [0x90,0x16,0x01,0xec] + xvf16ger2nn 0, 1, 2 +# CHECK-BE: xvf32ger 0, 1, 2 # encoding: [0xec,0x01,0x10,0xd8] +# CHECK-LE: xvf32ger 0, 1, 2 # encoding: [0xd8,0x10,0x01,0xec] + xvf32ger 0, 1, 2 +# CHECK-BE: xvf32gerpp 0, 1, 2 # encoding: [0xec,0x01,0x10,0xd0] +# CHECK-LE: xvf32gerpp 0, 1, 2 # encoding: [0xd0,0x10,0x01,0xec] + xvf32gerpp 0, 1, 2 +# CHECK-BE: xvf32gerpn 0, 1, 2 # encoding: [0xec,0x01,0x14,0xd0] +# CHECK-LE: xvf32gerpn 0, 1, 2 # encoding: [0xd0,0x14,0x01,0xec] + xvf32gerpn 0, 1, 2 +# CHECK-BE: xvf32gernp 0, 1, 2 # encoding: [0xec,0x01,0x12,0xd0] +# CHECK-LE: xvf32gernp 0, 1, 2 # encoding: [0xd0,0x12,0x01,0xec] + xvf32gernp 0, 1, 2 +# CHECK-BE: xvf32gernn 0, 1, 2 # encoding: [0xec,0x01,0x16,0xd0] +# CHECK-LE: xvf32gernn 0, 1, 2 # encoding: [0xd0,0x16,0x01,0xec] + xvf32gernn 0, 1, 2 +# CHECK-BE: xvf64ger 0, 2, 2 # encoding: [0xec,0x02,0x11,0xd8] +# CHECK-LE: xvf64ger 0, 2, 2 # encoding: [0xd8,0x11,0x02,0xec] + xvf64ger 0, 2, 2 +# CHECK-BE: xvf64gerpp 0, 0, 2 # encoding: [0xec,0x00,0x11,0xd0] +# CHECK-LE: xvf64gerpp 0, 0, 2 # encoding: [0xd0,0x11,0x00,0xec] + xvf64gerpp 0, 0, 2 +# CHECK-BE: xvf64gerpn 0, 4, 2 # encoding: [0xec,0x04,0x15,0xd0] +# CHECK-LE: xvf64gerpn 0, 4, 2 # encoding: [0xd0,0x15,0x04,0xec] + xvf64gerpn 0, 4, 2 +# CHECK-BE: xvf64gernp 0, 62, 2 # encoding: [0xec,0x1e,0x13,0xd4] +# CHECK-LE: xvf64gernp 0, 62, 2 # encoding: [0xd4,0x13,0x1e,0xec] + xvf64gernp 0, 62, 2 +# CHECK-BE: xvf64gernn 0, 0, 2 # encoding: [0xec,0x00,0x17,0xd0] +# CHECK-LE: xvf64gernn 0, 0, 2 # encoding: [0xd0,0x17,0x00,0xec] + xvf64gernn 0, 0, 2 +# CHECK-BE: xvi4ger8 0, 1, 2 # encoding: [0xec,0x01,0x11,0x18] +# CHECK-LE: xvi4ger8 0, 1, 2 # encoding: [0x18,0x11,0x01,0xec] + xvi4ger8 0, 1, 2 +# CHECK-BE: xvi4ger8pp 0, 1, 2 # encoding: [0xec,0x01,0x11,0x10] +# CHECK-LE: xvi4ger8pp 0, 1, 2 # encoding: [0x10,0x11,0x01,0xec] + xvi4ger8pp 0, 1, 2 +# CHECK-BE: xvi8ger4 0, 1, 2 # encoding: [0xec,0x01,0x10,0x18] +# CHECK-LE: xvi8ger4 0, 1, 2 # encoding: [0x18,0x10,0x01,0xec] + xvi8ger4 0, 1, 2 +# CHECK-BE: xvi8ger4pp 0, 1, 2 # encoding: [0xec,0x01,0x10,0x10] +# CHECK-LE: xvi8ger4pp 0, 1, 2 # encoding: [0x10,0x10,0x01,0xec] + xvi8ger4pp 0, 1, 2 +# CHECK-BE: xvi16ger2s 0, 1, 2 # encoding: [0xec,0x01,0x11,0x58] +# CHECK-LE: xvi16ger2s 0, 1, 2 # encoding: [0x58,0x11,0x01,0xec] + xvi16ger2s 0, 1, 2 +# CHECK-BE: xvi16ger2spp 0, 1, 2 # encoding: [0xec,0x01,0x11,0x50] +# CHECK-LE: xvi16ger2spp 0, 1, 2 # encoding: [0x50,0x11,0x01,0xec] + xvi16ger2spp 0, 1, 2 +# CHECK-BE: xvbf16ger2 2, 33, 34 # encoding: [0xed,0x01,0x11,0x9e] +# CHECK-LE: xvbf16ger2 2, 33, 34 # encoding: [0x9e,0x11,0x01,0xed] + xvbf16ger2 2, 33, 34 +# CHECK-BE: xvbf16ger2pp 1, 33, 34 # encoding: [0xec,0x81,0x11,0x96] +# CHECK-LE: xvbf16ger2pp 1, 33, 34 # encoding: [0x96,0x11,0x81,0xec] + xvbf16ger2pp 1, 33, 34 +# CHECK-BE: xvbf16ger2pn 2, 33, 34 # encoding: [0xed,0x01,0x15,0x96] +# CHECK-LE: xvbf16ger2pn 2, 33, 34 # encoding: [0x96,0x15,0x01,0xed] + xvbf16ger2pn 2, 33, 34 +# CHECK-BE: xvbf16ger2np 1, 33, 34 # encoding: [0xec,0x81,0x13,0x96] +# CHECK-LE: xvbf16ger2np 1, 33, 34 # encoding: [0x96,0x13,0x81,0xec] + xvbf16ger2np 1, 33, 34 +# CHECK-BE: xvbf16ger2nn 2, 33, 34 # encoding: [0xed,0x01,0x17,0x96] +# CHECK-LE: xvbf16ger2nn 2, 33, 34 # encoding: [0x96,0x17,0x01,0xed] + xvbf16ger2nn 2, 33, 34 +# CHECK-BE: pmxvbf16ger2 2, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xed,0x01,0x11,0x9e] +# CHECK-LE: pmxvbf16ger2 2, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x9e,0x11,0x01,0xed] + pmxvbf16ger2 2, 33, 34, 4, 4, 2 +# CHECK-BE: pmxvbf16ger2pp 1, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x81,0x11,0x96] +# CHECK-LE: pmxvbf16ger2pp 1, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x96,0x11,0x81,0xec] + pmxvbf16ger2pp 1, 33, 34, 4, 4, 2 +# CHECK-BE: pmxvbf16ger2pn 2, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xed,0x01,0x15,0x96] +# CHECK-LE: pmxvbf16ger2pn 2, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x96,0x15,0x01,0xed] + pmxvbf16ger2pn 2, 33, 34, 4, 4, 2 +# CHECK-BE: pmxvbf16ger2np 1, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x81,0x13,0x96] +# CHECK-LE: pmxvbf16ger2np 1, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x96,0x13,0x81,0xec] + pmxvbf16ger2np 1, 33, 34, 4, 4, 2 +# CHECK-BE: pmxvbf16ger2nn 2, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xed,0x01,0x17,0x96] +# CHECK-LE: pmxvbf16ger2nn 2, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x96,0x17,0x01,0xed] + pmxvbf16ger2nn 2, 33, 34, 4, 4, 2 +# CHECK-BE: xvi8ger4spp 1, 33, 34 # encoding: [0xec,0x81,0x13,0x1e] +# CHECK-LE: xvi8ger4spp 1, 33, 34 # encoding: [0x1e,0x13,0x81,0xec] + xvi8ger4spp 1, 33, 34 +# CHECK-BE: xvi16ger2 1, 33, 34 # encoding: [0xec,0x81,0x12,0x5e] +# CHECK-LE: xvi16ger2 1, 33, 34 # encoding: [0x5e,0x12,0x81,0xec] + xvi16ger2 1, 33, 34 +# CHECK-BE: xvi16ger2pp 1, 33, 34 # encoding: [0xec,0x81,0x13,0x5e] +# CHECK-LE: xvi16ger2pp 1, 33, 34 # encoding: [0x5e,0x13,0x81,0xec] + xvi16ger2pp 1, 33, 34 +# CHECK-BE: pmxvi8ger4spp 1, 33, 34, 4, 4, 8 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x81,0x13,0x1e] +# CHECK-LE: pmxvi8ger4spp 1, 33, 34, 4, 4, 8 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x1e,0x13,0x81,0xec] + pmxvi8ger4spp 1, 33, 34, 4, 4, 8 +# CHECK-BE: pmxvi16ger2 1, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x81,0x12,0x5e] +# CHECK-LE: pmxvi16ger2 1, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x5e,0x12,0x81,0xec] + pmxvi16ger2 1, 33, 34, 4, 4, 2 +# CHECK-BE: pmxvi16ger2pp 1, 33, 34, 4, 4, 2 # encoding: [0x07,0x90,0x80,0x44, +# CHECK-BE-SAME: 0xec,0x81,0x13,0x5e] +# CHECK-LE: pmxvi16ger2pp 1, 33, 34, 4, 4, 2 # encoding: [0x44,0x80,0x90,0x07, +# CHECK-LE-SAME: 0x5e,0x13,0x81,0xec] + pmxvi16ger2pp 1, 33, 34, 4, 4, 2 # CHECK-BE: lxvp 2, 32(4) # encoding: [0x18,0x44,0x00,0x20] # CHECK-LE: lxvp 2, 32(4) # encoding: [0x20,0x00,0x44,0x18] lxvp 2, 32(4) From b23916504a1a9f29c7519ed83813774eecce1789 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Sep 2020 13:55:01 -0700 Subject: [PATCH 219/544] Patch IEEEFloat::isSignificandAllZeros and IEEEFloat::isSignificandAllOnes (bug 34579) Patch IEEEFloat::isSignificandAllZeros and IEEEFloat::isSignificandAllOnes to behave correctly in the case that the size of the significand is a multiple of the width of the integerParts making up the significand. The patch to IEEEFloat::isSignificandAllOnes fixes bug 34579, and the patch to IEEE:Float:isSignificandAllZeros fixes the unit test "APFloatTest.x87Next" I added here. I have included both in this diff since the changes are very similar. Patch by Andrew Briand --- llvm/lib/Support/APFloat.cpp | 4 ++-- llvm/unittests/ADT/APFloatTest.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index adc6299662b24..c5adbe9cf746a 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -842,7 +842,7 @@ bool IEEEFloat::isSignificandAllOnes() const { // Test if the significand excluding the integral bit is all ones. This allows // us to test for binade boundaries. const integerPart *Parts = significandParts(); - const unsigned PartCount = partCount(); + const unsigned PartCount = partCountForBits(semantics->precision); for (unsigned i = 0; i < PartCount - 1; i++) if (~Parts[i]) return false; @@ -864,7 +864,7 @@ bool IEEEFloat::isSignificandAllZeros() const { // Test if the significand excluding the integral bit is all zeros. This // allows us to test for binade boundaries. const integerPart *Parts = significandParts(); - const unsigned PartCount = partCount(); + const unsigned PartCount = partCountForBits(semantics->precision); for (unsigned i = 0; i < PartCount - 1; i++) if (Parts[i]) diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 4cd027d242301..475ad83e2d9d1 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -4696,4 +4696,15 @@ TEST(APFloatTest, PPCDoubleDoubleFrexp) { EXPECT_EQ(0x3fe8000000000000ull, Result.bitcastToAPInt().getRawData()[0]); EXPECT_EQ(0x3c98000000000000ull, Result.bitcastToAPInt().getRawData()[1]); } + +TEST(APFloatTest, x87Largest) { + APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended()); + EXPECT_TRUE(MaxX87Val.isLargest()); +} + +TEST(APFloatTest, x87Next) { + APFloat F(APFloat::x87DoubleExtended(), "-1.0"); + F.next(false); + EXPECT_TRUE(ilogb(F) == -1); +} } From 23419bfd1c8f26617bda47e6d4732dcbfe0c09a3 Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Thu, 1 Oct 2020 01:01:09 +0200 Subject: [PATCH 220/544] [OpenMP][libarcher] Allow all possible argument separators in TSAN_OPTIONS Currently, the parser used to tokenize the TSAN_OPTIONS in libomp uses only spaces as separators, even though TSAN in compiler-rt supports other separators like ':' or ','. CTest uses ':' to separate sanitizer options by default. The documentation for other sanitizers mentions ':' as separator, but TSAN only lists spaces, which is probably where this mismatch originated. Patch provided by upsj Differential Revision: https://reviews.llvm.org/D87144 --- openmp/tools/archer/ompt-tsan.cpp | 27 ++++++++----- openmp/tools/archer/tests/lit.cfg | 6 ++- .../tests/parallel/parallel-nosuppression.c | 40 +++++++++++++++++++ .../archer/tests/parallel/parallel-simple.c | 1 + 4 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 openmp/tools/archer/tests/parallel/parallel-nosuppression.c diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp index d83cf04638d12..a288a2296a5eb 100644 --- a/openmp/tools/archer/ompt-tsan.cpp +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -15,18 +15,18 @@ #define __STDC_FORMAT_MACROS #endif +#include #include #include #include #include #include #include +#include #include #include #include -#include #include -#include #include #include @@ -89,17 +89,26 @@ class TsanFlags { TsanFlags(const char *env) : ignore_noninstrumented_modules(0) { if (env) { std::vector tokens; - std::string token; std::string str(env); - std::istringstream iss(str); - while (std::getline(iss, token, ' ')) - tokens.push_back(token); + auto end = str.end(); + auto it = str.begin(); + auto is_sep = [](char c) { + return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' || + c == '\r'; + }; + while (it != end) { + auto next_it = std::find_if(it, end, is_sep); + tokens.emplace_back(it, next_it); + it = next_it; + if (it != end) { + ++it; + } + } - for (std::vector::iterator it = tokens.begin(); - it != tokens.end(); ++it) { + for (const auto &token : tokens) { // we are interested in ignore_noninstrumented_modules to print a // warning - if (sscanf(it->c_str(), "ignore_noninstrumented_modules=%d", + if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d", &ignore_noninstrumented_modules)) continue; } diff --git a/openmp/tools/archer/tests/lit.cfg b/openmp/tools/archer/tests/lit.cfg index ed4ec4d03b69c..f064127817d62 100644 --- a/openmp/tools/archer/tests/lit.cfg +++ b/openmp/tools/archer/tests/lit.cfg @@ -93,6 +93,8 @@ if 'INTEL_LICENSE_FILE' in os.environ: # Race Tests config.substitutions.append(("%libarcher-compile-and-run-race", \ "%libarcher-compile && %libarcher-run-race")) +config.substitutions.append(("%libarcher-compile-and-run-nosuppression", \ + "%libarcher-compile && %libarcher-run-nosuppression")) config.substitutions.append(("%libarcher-compile-and-run", \ "%libarcher-compile && %libarcher-run")) config.substitutions.append(("%libarcher-cxx-compile-and-run", \ @@ -102,13 +104,15 @@ config.substitutions.append(("%libarcher-cxx-compile", \ config.substitutions.append(("%libarcher-compile", \ "%clang-archer %openmp_flags %archer_flags %flags %s -o %t" + libs)) config.substitutions.append(("%libarcher-run-race", "%suppression %deflake %t 2>&1 | tee %t.log")) +config.substitutions.append(("%libarcher-run-nosuppression", "%nosuppression %t 2>&1 | tee %t.log")) config.substitutions.append(("%libarcher-run", "%suppression %t 2>&1 | tee %t.log")) config.substitutions.append(("%clang-archerXX", config.test_cxx_compiler)) config.substitutions.append(("%clang-archer", config.test_c_compiler)) config.substitutions.append(("%openmp_flags", config.test_openmp_flags)) config.substitutions.append(("%archer_flags", config.archer_flags)) config.substitutions.append(("%flags", config.test_flags)) -config.substitutions.append(("%suppression", "env TSAN_OPTIONS='ignore_noninstrumented_modules=1'")) +config.substitutions.append(("%nosuppression", "env TSAN_OPTIONS='ignore_noninstrumented_modules=0'")) +config.substitutions.append(("%suppression", "env TSAN_OPTIONS='ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1'")) config.substitutions.append(("%deflake", os.path.join(os.path.dirname(__file__), "deflake.bash"))) config.substitutions.append(("FileCheck", config.test_filecheck)) diff --git a/openmp/tools/archer/tests/parallel/parallel-nosuppression.c b/openmp/tools/archer/tests/parallel/parallel-nosuppression.c new file mode 100644 index 0000000000000..f0e1cd8b5e468 --- /dev/null +++ b/openmp/tools/archer/tests/parallel/parallel-nosuppression.c @@ -0,0 +1,40 @@ +/* + * parallel-nosuppression.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +// RUN: %libarcher-compile-and-run-nosuppression | FileCheck %s +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var = 0; + +#pragma omp parallel num_threads(2) shared(var) + { + if (omp_get_thread_num() == 1) { + var++; + } + } // implicit barrier + + var++; + + fprintf(stderr, "DONE\n"); + int error = (var != 2); + return error; +} + +// CHECK-NOT: ThreadSanitizer: data race +// CHECK-NOT: ThreadSanitizer: reported +// CHECK: Warning: please export TSAN_OPTIONS +// CHECK: DONE diff --git a/openmp/tools/archer/tests/parallel/parallel-simple.c b/openmp/tools/archer/tests/parallel/parallel-simple.c index 86f0b5342d8a8..5c70ba601b506 100644 --- a/openmp/tools/archer/tests/parallel/parallel-simple.c +++ b/openmp/tools/archer/tests/parallel/parallel-simple.c @@ -36,4 +36,5 @@ int main(int argc, char *argv[]) { // CHECK-NOT: ThreadSanitizer: data race // CHECK-NOT: ThreadSanitizer: reported +// CHECK-NOT: Warning: please export TSAN_OPTIONS // CHECK: DONE From e4f50e587f077c246b7f29db0b7daddf583e2b64 Mon Sep 17 00:00:00 2001 From: Ranjeet Singh Date: Thu, 1 Oct 2020 00:30:36 +0100 Subject: [PATCH 221/544] [ARM] Add missing target for Arm neon test case. This is a follow-up from https://reviews.llvm.org/D61717. Where Richard described the issue with compiling arm_neon.h under -flax-vector-conversions=none. It looks like the example reproducer does actually work but what was missing was a test entry for that target. Differential Revision: https://reviews.llvm.org/D88546 --- clang/test/Headers/arm-neon-header.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Headers/arm-neon-header.c b/clang/test/Headers/arm-neon-header.c index f6362886010a5..8f64633b44d56 100644 --- a/clang/test/Headers/arm-neon-header.c +++ b/clang/test/Headers/arm-neon-header.c @@ -22,5 +22,6 @@ // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16fml+crypto+dotprod -std=c11 -xc -flax-vector-conversions=none %s // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16fml+crypto+dotprod -std=c11 -xc -flax-vector-conversions=none %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=arm64-linux-gnu -arch +neon -std=c11 -xc -flax-vector-conversions=none %s #include From bc43ddf42fff5a43f23354e25a32aca19541fec5 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 29 Sep 2020 18:23:02 -0700 Subject: [PATCH 222/544] [AArch64][GlobalISel] NFC: Refactor G_FCMP selection code Refactor this so it's similar to the existing integer comparison code. Also add some missing 64-bit testcases to select-fcmp.mir. Refactoring to prep for improving selection for G_FCMP-related conditional branches etc. Differential Revision: https://reviews.llvm.org/D88614 --- .../GISel/AArch64InstructionSelector.cpp | 170 +++++++++--------- .../AArch64/GlobalISel/select-fcmp.mir | 53 ++++++ 2 files changed, 139 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 53875db57c0dc..658ff94af2dc6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -172,6 +172,11 @@ class AArch64InstructionSelector : public InstructionSelector { emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + + /// Emit a floating point comparison between \p LHS and \p RHS. + MachineInstr *emitFPCompare(Register LHS, Register RHS, + MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitInstr(unsigned Opcode, std::initializer_list DstOps, std::initializer_list SrcOps, @@ -238,9 +243,16 @@ class AArch64InstructionSelector : public InstructionSelector { MachineInstr *emitFMovForFConstant(MachineInstr &MI, MachineRegisterInfo &MRI) const; - /// Emit a CSet for a compare. + /// Emit a CSet for an integer compare. + /// + /// \p DefReg is expected to be a 32-bit scalar register. MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, MachineIRBuilder &MIRBuilder) const; + /// Emit a CSet for a FP compare. + /// + /// \p Dst is expected to be a 32-bit scalar register. + MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, + MachineIRBuilder &MIRBuilder) const; /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". @@ -998,20 +1010,6 @@ static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, return 0; } -/// Helper function to select the opcode for a G_FCMP. -static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { - // If this is a compare against +0.0, then we don't have to explicitly - // materialize a constant. - const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI); - bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); - unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); - if (OpSize != 32 && OpSize != 64) - return 0; - unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, - {AArch64::FCMPSri, AArch64::FCMPDri}}; - return CmpOpcTbl[ShouldUseImm][OpSize == 64]; -} - /// Returns true if \p P is an unsigned integer comparison predicate. static bool isUnsignedICMPPred(const CmpInst::Predicate P) { switch (P) { @@ -2882,64 +2880,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } case TargetOpcode::G_FCMP: { - if (Ty != LLT::scalar(32)) { - LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty - << ", expected: " << LLT::scalar(32) << '\n'); - return false; - } - - unsigned CmpOpc = selectFCMPOpc(I, MRI); - if (!CmpOpc) + MachineIRBuilder MIRBuilder(I); + CmpInst::Predicate Pred = + static_cast(I.getOperand(1).getPredicate()); + if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), + MIRBuilder) || + !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder)) return false; - - // FIXME: regbank - - AArch64CC::CondCode CC1, CC2; - changeFCMPPredToAArch64CC( - (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); - - // Partially build the compare. Decide if we need to add a use for the - // third operand based off whether or not we're comparing against 0.0. - auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) - .addUse(I.getOperand(2).getReg()); - - // If we don't have an immediate compare, then we need to add a use of the - // register which wasn't used for the immediate. - // Note that the immediate will always be the last operand. - if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) - CmpMI = CmpMI.addUse(I.getOperand(3).getReg()); - - const Register DefReg = I.getOperand(0).getReg(); - Register Def1Reg = DefReg; - if (CC2 != AArch64CC::AL) - Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - - MachineInstr &CSetMI = - *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) - .addDef(Def1Reg) - .addUse(AArch64::WZR) - .addUse(AArch64::WZR) - .addImm(getInvertedCondCode(CC1)); - - if (CC2 != AArch64CC::AL) { - Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MachineInstr &CSet2MI = - *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) - .addDef(Def2Reg) - .addUse(AArch64::WZR) - .addUse(AArch64::WZR) - .addImm(getInvertedCondCode(CC2)); - MachineInstr &OrMI = - *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr)) - .addDef(DefReg) - .addUse(Def1Reg) - .addUse(Def2Reg); - constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); - constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); - } - constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); - constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); - I.eraseFromParent(); return true; } @@ -3984,6 +3931,66 @@ AArch64InstructionSelector::emitIntegerCompare( return {&*CmpMI, P}; } +MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( + Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); +#ifndef NDEBUG + LLT Ty = MRI.getType(Dst); + assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && + "Expected a 32-bit scalar register?"); +#endif + const Register ZeroReg = AArch64::WZR; + auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { + auto CSet = + MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) + .addImm(getInvertedCondCode(CC)); + constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); + return &*CSet; + }; + + AArch64CC::CondCode CC1, CC2; + changeFCMPPredToAArch64CC(Pred, CC1, CC2); + if (CC2 == AArch64CC::AL) + return EmitCSet(Dst, CC1); + + const TargetRegisterClass *RC = &AArch64::GPR32RegClass; + Register Def1Reg = MRI.createVirtualRegister(RC); + Register Def2Reg = MRI.createVirtualRegister(RC); + EmitCSet(Def1Reg, CC1); + EmitCSet(Def2Reg, CC2); + auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); + constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); + return &*OrMI; +} + +MachineInstr * +AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, + MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + LLT Ty = MRI.getType(LHS); + if (Ty.isVector()) + return nullptr; + unsigned OpSize = Ty.getSizeInBits(); + if (OpSize != 32 && OpSize != 64) + return nullptr; + + // If this is a compare against +0.0, then we don't have + // to explicitly materialize a constant. + const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); + bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); + unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, + {AArch64::FCMPSri, AArch64::FCMPDri}}; + unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; + + // Partially build the compare. Decide if we need to add a use for the + // third operand based off whether or not we're comparing against 0.0. + auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); + if (!ShouldUseImm) + CmpMI.addUse(RHS); + constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); + return &*CmpMI; +} + MachineInstr *AArch64InstructionSelector::emitVectorConcat( Optional Dst, Register Op1, Register Op2, MachineIRBuilder &MIRBuilder) const { @@ -4169,10 +4176,10 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { CondCode = changeICMPPredToAArch64CC(Pred); } else { // Get the condition code for the select. + CmpInst::Predicate Pred = + static_cast(CondDef->getOperand(1).getPredicate()); AArch64CC::CondCode CondCode2; - changeFCMPPredToAArch64CC( - (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode, - CondCode2); + changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two // instructions to emit the comparison. @@ -4181,16 +4188,11 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { if (CondCode2 != AArch64CC::AL) return false; - // Make sure we'll be able to select the compare. - unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); - if (!CmpOpc) + if (!emitFPCompare(CondDef->getOperand(2).getReg(), + CondDef->getOperand(3).getReg(), MIB)) { + LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); return false; - - // Emit a new compare. - auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()}); - if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) - Cmp.addUse(CondDef->getOperand(3).getReg()); - constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); + } } // Emit the select. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir index b366c0dea2670..45799079f9200 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir @@ -54,3 +54,56 @@ body: | %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2 $s0 = COPY %3(s32) RET_ReallyLR implicit $s0 + +... +--- +name: notzero_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: notzero_s64 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 112 + ; CHECK: FCMPDrr [[COPY]], [[FMOVDi]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $s0 = COPY [[CSINCWr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = COPY $d1 + %2:fpr(s64) = G_FCONSTANT double 1.000000e+00 + %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2 + $s0 = COPY %3(s32) + RET_ReallyLR implicit $s0 + + +... +--- +name: zero_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1, $s0 + + ; CHECK-LABEL: name: zero_s64 + ; CHECK: liveins: $d0, $d1, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: $s0 = COPY [[CSINCWr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = COPY $d1 + %2:fpr(s64) = G_FCONSTANT double 0.000000e+00 + %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2 + $s0 = COPY %3(s32) + RET_ReallyLR implicit $s0 From d689570d7dcb16ee241676e22324dc456837eb23 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 30 Sep 2020 17:01:27 -0700 Subject: [PATCH 223/544] [lldb] Make TestGuiBasicDebug more lenient Matt's change to the register allocator in 89baeaef2fa9 changed where we end up after the `finish`. Before we'd end up on line 4. * thread #1, queue = 'com.apple.main-thread', stop reason = step out Return value: (int) $0 = 1 frame #0: 0x0000000100003f7d a.out`main(argc=1, argv=0x00007ffeefbff630) at main.c:4:3 1 extern int func(); 2 3 int main(int argc, char **argv) { -> 4 func(); // Break here 5 func(); // Second 6 return 0; 7 } Now, we end up on line 5. * thread #1, queue = 'com.apple.main-thread', stop reason = step out Return value: (int) $0 = 1 frame #0: 0x0000000100003f8d a.out`main(argc=1, argv=0x00007ffeefbff630) at main.c:5:3 2 3 int main(int argc, char **argv) { 4 func(); // Break here -> 5 func(); // Second 6 return 0; 7 } Given that this is not expected stable to be stable I've made the test a bit more lenient to accept both scenarios. --- lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py index ed5daf57a4441..81067bf776e39 100644 --- a/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py +++ b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py @@ -37,11 +37,11 @@ def test_gui(self): self.child.send("d") # down self.child.expect_exact("return 1; // In function") self.child.send("f") # finish - self.child.expect("func\(\); // Break here[^\r\n]+<<< Thread 1: step out") + self.child.expect("<<< Thread 1: step out") self.child.send("s") # move onto the second one - self.child.expect("func\(\); // Second[^\r\n]+<<< Thread 1: step in") + self.child.expect("<<< Thread 1: step in") self.child.send("n") # step over - self.child.expect("return 0;[^\r\n]+<<< Thread 1: step over") + self.child.expect("<<< Thread 1: step over") # Press escape to quit the gui self.child.send(escape_key) From e24f0ac7a389fcb5c2f5295e717d9f7d3fcd4cea Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 12:53:00 -0700 Subject: [PATCH 224/544] [flang] Allow record advancement in external formatted sequential READ The '/' control edit descriptor causes a runtime crash for an external formatted sequential READ because the AdvanceRecord() member function for external units implemented only the tasks to finish reading the current record. Split those out into a new FinishReadingRecord() member function, call that instead from EndIoStatement(), and change AdvanceRecord() to both finish reading the current record and to begin reading the next one. Differential revision: https://reviews.llvm.org/D88607 --- flang/runtime/io-stmt.cpp | 30 ++++++++++++++------- flang/runtime/io-stmt.h | 7 +++-- flang/runtime/unit.cpp | 55 ++++++++++++++++++++++++--------------- flang/runtime/unit.h | 2 ++ 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 9bf0284358b96..2a7d552dacd8b 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -236,11 +236,13 @@ int NoUnitIoStatementState::EndIoStatement() { template int ExternalIoStatementState

::EndIoStatement() { if constexpr (DIR == Direction::Input) { BeginReadingRecord(); // in case of READ with no data items - } - if (!unit().nonAdvancing && GetIoStat() != IostatEnd) { - unit().AdvanceRecord(*this); - } - if constexpr (DIR == Direction::Output) { + if (!unit().nonAdvancing) { + FinishReadingRecord(); + } + } else { + if (!unit().nonAdvancing) { + unit().AdvanceRecord(*this); + } unit().FlushIfTerminal(*this); } return ExternalIoStatementBase::EndIoStatement(); @@ -315,10 +317,20 @@ void ExternalIoStatementState::HandleRelativePosition(std::int64_t n) { template void ExternalIoStatementState::BeginReadingRecord() { if constexpr (DIR == Direction::Input) { - if (!beganReading_) { - beganReading_ = true; - unit().BeginReadingRecord(*this); - } + unit().BeginReadingRecord(*this); + } else { + Crash("ExternalIoStatementState::BeginReadingRecord() " + "called"); + } +} + +template +void ExternalIoStatementState::FinishReadingRecord() { + if constexpr (DIR == Direction::Input) { + unit().FinishReadingRecord(*this); + } else { + Crash("ExternalIoStatementState::FinishReadingRecord() " + "called"); } } diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 9e68deab2e641..3c82dc8b1b0a0 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -65,6 +65,7 @@ class IoStatementState { ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit MutableModes &mutableModes(); void BeginReadingRecord(); + void FinishReadingRecord(); bool Inquire(InquiryKeywordHash, char *, std::size_t); bool Inquire(InquiryKeywordHash, bool &); bool Inquire(InquiryKeywordHash, std::int64_t, bool &); // PENDING= @@ -123,7 +124,7 @@ struct IoStatementBase : public DefaultFormatControlCallbacks { std::optional GetNextDataEdit(IoStatementState &, int = 1); ExternalFileUnit *GetExternalFileUnit() const { return nullptr; } void BeginReadingRecord() {} - + void FinishReadingRecord() {} bool Inquire(InquiryKeywordHash, char *, std::size_t); bool Inquire(InquiryKeywordHash, bool &); bool Inquire(InquiryKeywordHash, std::int64_t, bool &); @@ -269,9 +270,7 @@ class ExternalIoStatementState : public ExternalIoStatementBase, void HandleRelativePosition(std::int64_t); void HandleAbsolutePosition(std::int64_t); void BeginReadingRecord(); - -private: - bool beganReading_{false}; + void FinishReadingRecord(); }; template diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index be36666f66e46..8170fbc696c21 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -349,6 +349,10 @@ void ExternalFileUnit::SetLeftTabLimit() { void ExternalFileUnit::BeginReadingRecord(IoErrorHandler &handler) { RUNTIME_CHECK(handler, direction_ == Direction::Input); + if (beganReadingRecord_) { + return; + } + beganReadingRecord_ = true; if (access == Access::Sequential) { if (endfileRecordNumber && currentRecordNumber >= *endfileRecordNumber) { handler.SignalEnd(); @@ -367,28 +371,37 @@ void ExternalFileUnit::BeginReadingRecord(IoErrorHandler &handler) { } } +void ExternalFileUnit::FinishReadingRecord(IoErrorHandler &handler) { + RUNTIME_CHECK(handler, direction_ == Direction::Input && beganReadingRecord_); + beganReadingRecord_ = false; + if (access == Access::Sequential) { + RUNTIME_CHECK(handler, recordLength.has_value()); + if (isFixedRecordLength) { + frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; + recordOffsetInFrame_ = 0; + } else if (isUnformatted) { + // Retain footer in frame for more efficient BACKSPACE + frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; + recordOffsetInFrame_ = sizeof(std::uint32_t); + recordLength.reset(); + } else { // formatted + if (Frame()[recordOffsetInFrame_ + *recordLength] == '\r') { + ++recordOffsetInFrame_; + } + recordOffsetInFrame_ += *recordLength + 1; + RUNTIME_CHECK(handler, Frame()[recordOffsetInFrame_ - 1] == '\n'); + recordLength.reset(); + } + } + ++currentRecordNumber; + BeginRecord(); +} + bool ExternalFileUnit::AdvanceRecord(IoErrorHandler &handler) { bool ok{true}; if (direction_ == Direction::Input) { - if (access == Access::Sequential) { - RUNTIME_CHECK(handler, recordLength.has_value()); - if (isFixedRecordLength) { - frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; - recordOffsetInFrame_ = 0; - } else if (isUnformatted) { - // Retain footer in frame for more efficient BACKSPACE - frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; - recordOffsetInFrame_ = sizeof(std::uint32_t); - recordLength.reset(); - } else { // formatted - if (Frame()[recordOffsetInFrame_ + *recordLength] == '\r') { - ++recordOffsetInFrame_; - } - recordOffsetInFrame_ += *recordLength + 1; - RUNTIME_CHECK(handler, Frame()[recordOffsetInFrame_ - 1] == '\n'); - recordLength.reset(); - } - } + FinishReadingRecord(handler); + BeginReadingRecord(handler); } else { // Direction::Output if (!isUnformatted) { if (isFixedRecordLength && recordLength) { @@ -406,9 +419,9 @@ bool ExternalFileUnit::AdvanceRecord(IoErrorHandler &handler) { recordOffsetInFrame_ + recordLength.value_or(furthestPositionInRecord); recordOffsetInFrame_ = 0; impliedEndfile_ = true; + ++currentRecordNumber; + BeginRecord(); } - ++currentRecordNumber; - BeginRecord(); return ok; } diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index 9d66d962bc56d..644ca4ad63cc1 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -78,6 +78,7 @@ class ExternalFileUnit : public ConnectionState, std::optional GetCurrentChar(IoErrorHandler &); void SetLeftTabLimit(); void BeginReadingRecord(IoErrorHandler &); + void FinishReadingRecord(IoErrorHandler &); bool AdvanceRecord(IoErrorHandler &); void BackspaceRecord(IoErrorHandler &); void FlushIfTerminal(IoErrorHandler &); @@ -105,6 +106,7 @@ class ExternalFileUnit : public ConnectionState, int unitNumber_{-1}; Direction direction_{Direction::Output}; bool impliedEndfile_{false}; // seq. output has taken place + bool beganReadingRecord_{false}; Lock lock_; From 4ab45cc2260d87f18e1b05517d5d366b2e754b72 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 30 Sep 2020 17:20:57 -0700 Subject: [PATCH 225/544] [AArch64][GlobalISel] Add some more legal types for G_PHI, G_IMPLICIT_DEF, G_FREEZE. Also use this opportunity start to clean up the mess of vector type lists we have in the LegalizerInfo. Unfortunately since the legalizer rule builders require std::initializer_list objects as parameters we can't programmatically generate the type lists. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 13 +++- .../AArch64/GlobalISel/legalize-freeze.mir | 20 ++++- .../AArch64/GlobalISel/legalize-phi.mir | 76 +++++++------------ 3 files changed, 57 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 7d013c4398832..206e409992240 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -54,6 +54,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT v2s64 = LLT::vector(2, 64); const LLT v2p0 = LLT::vector(2, p0); + const auto PackedVectorAllTypeList = {/* Begin 128bit types */ + v16s8, v8s16, v4s32, v2s64, v2p0, + /* End 128bit types */ + /* Begin 64bit types */ + v8s8, v4s16, v2s32}; + const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); // FIXME: support subtargets which have neon/fp-armv8 disabled. @@ -63,7 +69,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) } getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) - .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64, v16s8, v8s16}) + .legalFor({p0, s1, s8, s16, s32, s64}) + .legalFor(PackedVectorAllTypeList) .clampScalar(0, s1, s64) .widenScalarToNextPow2(0, 8) .fewerElementsIf( @@ -79,8 +86,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return std::make_pair(0, EltTy); }); - getActionDefinitionsBuilder(G_PHI) - .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) + getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64}) + .legalFor(PackedVectorAllTypeList) .clampScalar(0, s16, s64) .widenScalarToNextPow2(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir index 9417df066a46b..f6c15ec4925d7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=aarch64 -run-pass=legalizer -O0 %s -o - | FileCheck %s +# RUN: llc -march=aarch64 -run-pass=legalizer -global-isel-abort=1 -O0 %s -o - | FileCheck %s --- name: test_freeze_s64 body: | @@ -67,3 +67,21 @@ body: | $w0 = COPY %1 $w1 = COPY %2 ... +--- +name: test_freeze_v8s8 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_freeze_v8s8 + ; CHECK: %d0:_(<8 x s8>) = COPY $d0 + ; CHECK: [[FREEZE:%[0-9]+]]:_(<8 x s8>) = G_FREEZE %d0 + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[FREEZE]](<8 x s8>) + ; CHECK: $w0 = COPY [[UV]](<4 x s8>) + ; CHECK: $w1 = COPY [[UV1]](<4 x s8>) + %d0:_(<8 x s8>) = COPY $d0 + %0:_(<8 x s8>) = G_FREEZE %d0 + %1:_(<4 x s8>), %2:_(<4 x s8>) = G_UNMERGE_VALUES %0 + $w0 = COPY %1 + $w1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index c909b27b83ccf..b9fbd17c07dae 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -1,51 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s ---- | - ; ModuleID = '/tmp/test.ll' - source_filename = "/tmp/test.ll" - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64-unknown-unknown" - - define i32 @legalize_phi(i32 %argc) { - entry: - ret i32 0 - } - - define i64* @legalize_phi_ptr(i64* %a, i64* %b, i1 %cond) { - entry: - ret i64* null - } - - define i32 @legalize_phi_empty(i32 %argc) { - entry: - ret i32 0 - } - - define i32 @legalize_phi_loop(i32 %argc) { - entry: - ret i32 0 - } - - define i32 @legalize_phi_cycle(i32 %argc) { - entry: - ret i32 0 - } - define i32 @legalize_phi_same_bb(i32 %argc) { - entry: - ret i32 0 - } - - define i32 @legalize_phi_diff_bb(i32 %argc, i32 %argc2) { - entry: - ret i32 0 - } - - define i32 @legalize_phi_check_insertpt(i64 %a) { - entry: - ret i32 0 - } - -... +# RUN: llc -O0 -mtriple=aarch64-unknown-unknown -verify-machineinstrs -global-isel-abort=1 -run-pass=legalizer %s -o - | FileCheck %s --- name: legalize_phi alignment: 4 @@ -610,7 +564,6 @@ regBankSelected: false selected: false tracksRegLiveness: true body: | - ; Check that the G_MERGE here gets inserted after all the PHIs. ; CHECK-LABEL: name: legalize_phi_check_insertpt ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) @@ -627,6 +580,7 @@ body: | ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store 16) ; CHECK: G_STORE [[PHI2]](s64), [[COPY1]](p0) :: (store 8) ; CHECK: RET_ReallyLR + ; Check that the G_MERGE here gets inserted after all the PHIs. bb.0: successors: %bb.1(0x40000000) liveins: $x0, $x1 @@ -644,3 +598,29 @@ body: | RET_ReallyLR ... +--- +name: legalize_phi_vector +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: legalize_phi_vector + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $q0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s8>) = G_PHI [[COPY]](<16 x s8>), %bb.0 + ; CHECK: $q0 = COPY [[PHI]](<16 x s8>) + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.1 + liveins: $q0, $x1 + + %0:_(<16 x s8>) = COPY $q0 + G_BR %bb.1 + + bb.1: + %3:_(<16 x s8>) = G_PHI %0(<16 x s8>), %bb.0 + $q0 = COPY %3(<16 x s8>) + RET_ReallyLR +... From 460dda071e091df3b5584f21954c9209e7334c50 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 30 Sep 2020 09:22:18 -0700 Subject: [PATCH 226/544] [WholeProgramDevirt][NewPM] Add NPM testing path to match legacy pass The legacy pass's default constructor sets UseCommandLine = true and goes down a separate testing route. Match that in the NPM pass. This fixes all tests in llvm/test/Transforms/WholeProgramDevirt under NPM. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D88588 --- llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h | 3 +++ llvm/lib/Passes/PassRegistry.def | 2 +- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp | 5 +++++ llvm/test/Transforms/WholeProgramDevirt/import.ll | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 86e28cfead80e..6e92f8fd3f0d2 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -223,6 +223,9 @@ void setAfterReturnValues(MutableArrayRef Targets, struct WholeProgramDevirtPass : public PassInfoMixin { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool UseCommandLine = false; + WholeProgramDevirtPass() + : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index add685dbdacc2..0111fc494c43c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -97,7 +97,7 @@ MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("strip-debug-declare", StripDebugDeclarePass()) MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) -MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) +MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("dfsan", DataFlowSanitizerPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6baeaf48b4901..e97f1acbb3962 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -753,6 +753,11 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { return FAM.getResult(F); }; + if (UseCommandLine) { + if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree)) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); + } if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary, ImportSummary) .run()) diff --git a/llvm/test/Transforms/WholeProgramDevirt/import.ll b/llvm/test/Transforms/WholeProgramDevirt/import.ll index 8beb27db26fe0..d1ddacacfaced 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/import.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/import.ll @@ -1,3 +1,4 @@ +; RUN: opt -S -passes=wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-single-impl.yaml < %s | FileCheck --check-prefixes=CHECK,SINGLE-IMPL %s ; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-single-impl.yaml < %s | FileCheck --check-prefixes=CHECK,SINGLE-IMPL %s ; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-uniform-ret-val.yaml < %s | FileCheck --check-prefixes=CHECK,INDIR,UNIFORM-RET-VAL %s ; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-unique-ret-val0.yaml < %s | FileCheck --check-prefixes=CHECK,INDIR,UNIQUE-RET-VAL0 %s From 93a1fc2e18b452216be70f534da42f7702adbe1d Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 30 Sep 2020 17:35:53 -0700 Subject: [PATCH 227/544] Try to fix build. May have used a C++ feature too new/not supported on all platforms. --- .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 206e409992240..b6a006eba53b9 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" +#include #define DEBUG_TYPE "aarch64-legalinfo" @@ -54,11 +55,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT v2s64 = LLT::vector(2, 64); const LLT v2p0 = LLT::vector(2, p0); - const auto PackedVectorAllTypeList = {/* Begin 128bit types */ - v16s8, v8s16, v4s32, v2s64, v2p0, - /* End 128bit types */ - /* Begin 64bit types */ - v8s8, v4s16, v2s32}; + std::initializer_list PackedVectorAllTypeList = {/* Begin 128bit types */ + v16s8, v8s16, v4s32, + v2s64, v2p0, + /* End 128bit types */ + /* Begin 64bit types */ + v8s8, v4s16, v2s32}; const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); From 3c45a06f26edfb7e94003adf58cb8951ea9c2ce6 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 30 Jul 2020 17:44:32 -0700 Subject: [PATCH 228/544] [lld][WebAssembly] Allow exporting of mutable globals In particular allow explict exporting of `__stack_pointer` but exclud this from `--export-all` to avoid requiring the mutable globals feature whenenve `--export-all` is used. This uncovered a bug in populateTargetFeatures regarding checking if the mutable-globals feature is allowed. See: https://github.com/WebAssembly/binaryen/issues/2934 Differential Revision: https://reviews.llvm.org/D88506 --- lld/docs/WebAssembly.rst | 4 ++ lld/test/wasm/mutable-global-exports.s | 88 ++++++++++++++++++++++++++ lld/test/wasm/mutable-globals.s | 1 + lld/wasm/Writer.cpp | 15 +++-- 4 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 lld/test/wasm/mutable-global-exports.s diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst index b23f2cd462b4b..bf1f008e608e6 100644 --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -39,6 +39,10 @@ WebAssembly-specific options: Export all symbols (normally combined with --no-gc-sections) + Note that this will not export linker-generated mutable globals unless + the resulting binaryen already includes the 'mutable-globals' features + since that would otherwise create and invalid binaryen. + .. option:: --export-dynamic When building an executable, export any non-hidden symbols. By default only diff --git a/lld/test/wasm/mutable-global-exports.s b/lld/test/wasm/mutable-global-exports.s new file mode 100644 index 0000000000000..e2e45ff93a4bc --- /dev/null +++ b/lld/test/wasm/mutable-global-exports.s @@ -0,0 +1,88 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# +# Should fail without mutable globals feature enabled. +# RUN: not wasm-ld --export-all %t.o -o %t.wasm 2>&1 | FileCheck -check-prefix=CHECK-ERR %s +# RUN: not wasm-ld --export=foo_global %t.o -o %t.wasm 2>&1 | FileCheck -check-prefix=CHECK-ERR %s +# +# RUN: wasm-ld --features=mutable-globals --export=foo_global %t.o -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +# Explcitly check that __stack_pointer can be exported +# RUN: wasm-ld --features=mutable-globals --export=__stack_pointer %t.o -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck -check-prefix=CHECK-SP %s + +# RUN: wasm-ld --features=mutable-globals --export-all %t.o -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck -check-prefix=CHECK-ALL %s + + +.globl _start +.globl foo_global + +.globaltype foo_global, i32 +foo_global: + +_start: + .functype _start () -> () + end_function + +# CHECK-ERR: mutable global exported but 'mutable-globals' feature not present in inputs: `foo_global`. Use --no-check-features to suppress + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: _start +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foo_global +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Type: CODE + +# CHECK-SP: - Type: EXPORT +# CHECK-SP-NEXT: Exports: +# CHECK-SP-NEXT: - Name: memory +# CHECK-SP-NEXT: Kind: MEMORY +# CHECK-SP-NEXT: Index: 0 +# CHECK-SP-NEXT: - Name: __stack_pointer +# CHECK-SP-NEXT: Kind: GLOBAL +# CHECK-SP-NEXT: Index: 0 +# CHECK-SP-NEXT: - Name: _start +# CHECK-SP-NEXT: Kind: FUNCTION +# CHECK-SP-NEXT: Index: 0 +# CHECK-SP-NEXT: - Type: CODE + +# CHECK-ALL: - Type: EXPORT +# CHECK-ALL-NEXT: Exports: +# CHECK-ALL-NEXT: - Name: memory +# CHECK-ALL-NEXT: Kind: MEMORY +# CHECK-ALL-NEXT: Index: 0 +# CHECK-ALL-NEXT: - Name: __wasm_call_ctors +# CHECK-ALL-NEXT: Kind: FUNCTION +# CHECK-ALL-NEXT: Index: 0 +# CHECK-ALL-NEXT: - Name: _start +# CHECK-ALL-NEXT: Kind: FUNCTION +# CHECK-ALL-NEXT: Index: 1 +# CHECK-ALL-NEXT: - Name: foo_global +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 1 +# CHECK-ALL-NEXT: - Name: __dso_handle +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 2 +# CHECK-ALL-NEXT: - Name: __data_end +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 3 +# CHECK-ALL-NEXT: - Name: __global_base +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 4 +# CHECK-ALL-NEXT: - Name: __heap_base +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 5 +# CHECK-ALL-NEXT: - Name: __memory_base +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 6 +# CHECK-ALL-NEXT: - Name: __table_base +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 7 +# CHECK-ALL-NEXT: - Type: CODE diff --git a/lld/test/wasm/mutable-globals.s b/lld/test/wasm/mutable-globals.s index ea856e5112895..9e8911b02bf2e 100644 --- a/lld/test/wasm/mutable-globals.s +++ b/lld/test/wasm/mutable-globals.s @@ -1,5 +1,6 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s # RUN: not wasm-ld %t.o -o %t.wasm 2>&1 | FileCheck %s +# RUN: wasm-ld --features=mutable-globals %t.o -o %t.wasm .globl _start _start: diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 90dd96bd1c894..1d669ca7a723c 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -453,7 +453,7 @@ void Writer::populateTargetFeatures() { if (!config->checkFeatures) return; - if (!config->relocatable && used.count("mutable-globals") == 0) { + if (!config->relocatable && allowed.count("mutable-globals") == 0) { for (const Symbol *sym : out.importSec->importedSymbols) { if (auto *global = dyn_cast(sym)) { if (global->getGlobalType()->Mutable) { @@ -571,12 +571,13 @@ void Writer::calculateExports() { } export_ = {name, WASM_EXTERNAL_FUNCTION, f->getFunctionIndex()}; } else if (auto *g = dyn_cast(sym)) { - // TODO(sbc): Remove this check once to mutable global proposal is - // implement in all major browsers. - // See: https://github.com/WebAssembly/mutable-global - if (g->getGlobalType()->Mutable) { - // Only __stack_pointer and __tls_base should ever be create as mutable. - assert(g == WasmSym::stackPointer || g == WasmSym::tlsBase); + if (g->getGlobalType()->Mutable && !g->getFile() && !g->forceExport) { + // Avoid exporting mutable globals are linker synthesized (e.g. + // __stack_pointer or __tls_base) unless they are explicitly exported + // from the command line. + // Without this check `--export-all` would cause any program using the + // stack pointer to export a mutable global even if none of the input + // files were built with the `mutable-globals` feature. continue; } export_ = {name, WASM_EXTERNAL_GLOBAL, g->getGlobalIndex()}; From d4e889f1f5723105dbab12b749503d2462eb1755 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Wed, 30 Sep 2020 17:47:25 -0700 Subject: [PATCH 229/544] Remove `Ops` suffix from dialect library names Dialects include more than just ops, so this suffix is outdated. Follows discussion in https://llvm.discourse.group/t/rfc-canonical-file-paths-to-dialects/621 Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D88530 --- flang/lib/Lower/CMakeLists.txt | 2 +- mlir/docs/Tutorials/CreatingADialect.md | 51 ++++++++----------- mlir/lib/Analysis/CMakeLists.txt | 4 +- mlir/lib/CAPI/Standard/CMakeLists.txt | 2 +- .../AffineToStandard/CMakeLists.txt | 4 +- mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt | 2 +- .../lib/Conversion/GPUToVulkan/CMakeLists.txt | 2 +- .../Conversion/LinalgToLLVM/CMakeLists.txt | 2 +- .../Conversion/LinalgToSPIRV/CMakeLists.txt | 2 +- .../LinalgToStandard/CMakeLists.txt | 2 +- mlir/lib/Conversion/SCFToGPU/CMakeLists.txt | 6 +-- mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt | 6 +-- .../Conversion/StandardToSPIRV/CMakeLists.txt | 2 +- mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt | 4 +- mlir/lib/Dialect/Affine/IR/CMakeLists.txt | 4 +- .../Dialect/Affine/Transforms/CMakeLists.txt | 4 +- mlir/lib/Dialect/Affine/Utils/CMakeLists.txt | 2 +- mlir/lib/Dialect/GPU/CMakeLists.txt | 2 +- .../Dialect/Linalg/Analysis/CMakeLists.txt | 6 +-- mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt | 6 +-- mlir/lib/Dialect/Linalg/IR/CMakeLists.txt | 4 +- .../Dialect/Linalg/Transforms/CMakeLists.txt | 6 +-- mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt | 6 +-- mlir/lib/Dialect/Quant/CMakeLists.txt | 2 +- mlir/lib/Dialect/SCF/CMakeLists.txt | 2 +- .../lib/Dialect/SCF/Transforms/CMakeLists.txt | 4 +- mlir/lib/Dialect/Shape/IR/CMakeLists.txt | 2 +- mlir/lib/Dialect/StandardOps/CMakeLists.txt | 2 +- .../StandardOps/Transforms/CMakeLists.txt | 2 +- mlir/lib/Dialect/Vector/CMakeLists.txt | 6 +-- mlir/lib/ExecutionEngine/CMakeLists.txt | 2 +- mlir/lib/Transforms/CMakeLists.txt | 4 +- mlir/lib/Transforms/Utils/CMakeLists.txt | 4 +- mlir/test/EDSC/CMakeLists.txt | 6 +-- mlir/test/lib/Dialect/Test/CMakeLists.txt | 2 +- mlir/test/lib/Transforms/CMakeLists.txt | 4 +- 36 files changed, 82 insertions(+), 91 deletions(-) diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index e104a8fd0d89e..07b87ef22ce92 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -30,7 +30,7 @@ add_flang_library(FortranLower MLIRAffineToStandard MLIRLLVMIR MLIRSCFToStandard - MLIRStandardOps + MLIRStandard LINK_COMPONENTS Support diff --git a/mlir/docs/Tutorials/CreatingADialect.md b/mlir/docs/Tutorials/CreatingADialect.md index 9f9eb7a8317b0..17d2ec97eb681 100644 --- a/mlir/docs/Tutorials/CreatingADialect.md +++ b/mlir/docs/Tutorials/CreatingADialect.md @@ -26,7 +26,7 @@ typically described in TableGen file using the [DDR format](DeclarativeRewrites.md). Note that dialect names should not generally be suffixed with “Ops”, -although some files pertaining to the operations of a dialect (e.g. +although some files pertaining only to the operations of a dialect (e.g. FooOps.cpp) might be. ## CMake best practices @@ -38,10 +38,8 @@ tablegen in a file FooOps.td. This file forms the core of a dialect and is declared using add_mlir_dialect(). ```cmake - add_mlir_dialect(FooOps foo) add_mlir_doc(FooOps -gen-dialect-doc FooDialect Dialects/) - ``` This generates the correct rules to run mlir-tblgen, along with a @@ -49,6 +47,7 @@ This generates the correct rules to run mlir-tblgen, along with a Dialect transformations are typically declared in a file FooTransforms.td. Targets for TableGen are described in typical llvm fashion. + ```cmake set(LLVM_TARGET_DEFINITIONS FooTransforms.td) mlir_tablegen(FooTransforms.h.inc -gen-rewriters) @@ -67,20 +66,18 @@ other dialect libraries. Typically this dependence is declared using target_link_libraries() and the PUBLIC keyword. For instance: ```cmake - -add_mlir_dialect_library(FooOps - DEPENDS - MLIRFooOpsIncGen - MLIRFooTransformsIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - BarOps - - ) - +add_mlir_dialect_library(MLIRFoo + DEPENDS + MLIRFooOpsIncGen + MLIRFooTransformsIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRBar + + ) ``` add_mlir_dialect_library() is a thin wrapper around add_llvm_library() @@ -90,9 +87,7 @@ access to all dialects. This list is also linked into libMLIR.so. The list can be retrieved from the MLIR_DIALECT_LIBS global property: ```cmake - get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) - ``` Note that although the Bar dialect also uses TableGen to declare its @@ -139,18 +134,16 @@ dialects (e.g. MLIRStandard). Typically this dependence is specified using target_link_libraries() and the PUBLIC keyword. For instance: ```cmake - add_mlir_conversion_library(MLIRBarToFoo - BarToFoo.cpp + BarToFoo.cpp - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/BarToFoo - - LINK_LIBS PUBLIC - BarOps - FooOps - ) + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/BarToFoo + LINK_LIBS PUBLIC + MLIRBar + MLIRFoo + ) ``` add_mlir_conversion_library() is a thin wrapper around @@ -161,9 +154,7 @@ is also linked in libMLIR.so. The list can be retrieved from the MLIR_CONVERSION_LIBS global property: ```cmake - get_property(dialect_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) - ``` Note that it is only necessary to specify a PUBLIC dependence against diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 524203b87068e..217a94995c0ab 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -21,7 +21,7 @@ add_mlir_library(MLIRAnalysis mlir-headers LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRCallInterfaces MLIRControlFlowInterfaces MLIRInferTypeOpInterface @@ -43,7 +43,7 @@ add_mlir_library(MLIRLoopAnalysis mlir-headers LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRCallInterfaces MLIRControlFlowInterfaces MLIRInferTypeOpInterface diff --git a/mlir/lib/CAPI/Standard/CMakeLists.txt b/mlir/lib/CAPI/Standard/CMakeLists.txt index 662841c2d2357..c8411666052ec 100644 --- a/mlir/lib/CAPI/Standard/CMakeLists.txt +++ b/mlir/lib/CAPI/Standard/CMakeLists.txt @@ -7,5 +7,5 @@ add_mlir_library(MLIRCAPIStandard LINK_LIBS PUBLIC MLIRCAPIIR - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt index 47a371fcea877..45c398195d156 100644 --- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt @@ -11,10 +11,10 @@ add_mlir_conversion_library(MLIRAffineToStandard Core LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRSCF MLIRPass - MLIRStandardOps + MLIRStandard MLIRTransforms MLIRIR ) diff --git a/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt index cce793fe5a6e0..2da9c709079f8 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt @@ -16,7 +16,7 @@ add_mlir_conversion_library(MLIRGPUToSPIRVTransforms MLIRPass MLIRSCFToSPIRV MLIRSPIRV - MLIRStandardOps + MLIRStandard MLIRStandardToSPIRVTransforms MLIRSupport MLIRTransforms diff --git a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt index b62f72fe91918..65733523531ad 100644 --- a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt @@ -12,7 +12,7 @@ add_mlir_conversion_library(MLIRGPUToVulkanTransforms MLIRPass MLIRSPIRV MLIRSPIRVSerialization - MLIRStandardOps + MLIRStandard MLIRSupport MLIRTransforms MLIRTranslation diff --git a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt index d507b413fbec4..9ae00bf1e80aa 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt @@ -15,7 +15,7 @@ add_mlir_conversion_library(MLIRLinalgToLLVM MLIRAffineToStandard MLIREDSC MLIRIR - MLIRLinalgOps + MLIRLinalg MLIRLLVMIR MLIRSCFToStandard MLIRStandardToLLVM diff --git a/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt index 98553ad967483..e76e9b7f50128 100644 --- a/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt @@ -11,7 +11,7 @@ add_mlir_conversion_library(MLIRLinalgToSPIRVTransforms LINK_LIBS PUBLIC MLIRIR - MLIRLinalgOps + MLIRLinalg MLIRLinalgUtils MLIRPass MLIRSPIRV diff --git a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt index 8cfb315bc6a16..b38a4b8e9f663 100644 --- a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt @@ -13,7 +13,7 @@ add_mlir_conversion_library(MLIRLinalgToStandard LINK_LIBS PUBLIC MLIREDSC MLIRIR - MLIRLinalgOps + MLIRLinalg MLIRPass MLIRSCF MLIRTransforms diff --git a/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt b/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt index 1da4dacd190e2..10fed819ca352 100644 --- a/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt +++ b/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt @@ -9,13 +9,13 @@ add_mlir_conversion_library(MLIRSCFToGPU MLIRConversionPassIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAffineToStandard MLIRGPU MLIRIR - MLIRLinalgOps + MLIRLinalg MLIRPass - MLIRStandardOps + MLIRStandard MLIRSupport MLIRTransforms ) diff --git a/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt index 6d95813d717f7..1a38676277208 100644 --- a/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt @@ -8,13 +8,13 @@ add_mlir_conversion_library(MLIRSCFToSPIRV MLIRConversionPassIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAffineToStandard MLIRSPIRV MLIRIR - MLIRLinalgOps + MLIRLinalg MLIRPass - MLIRStandardOps + MLIRStandard MLIRSupport MLIRTransforms ) diff --git a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt index e60985984da32..5ccbcc6b39476 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt @@ -17,5 +17,5 @@ add_mlir_conversion_library(MLIRStandardToSPIRVTransforms MLIRSupport MLIRTransformUtils MLIRSPIRV - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt index a0e8b6f90a3cb..e753f4e5c0fcc 100644 --- a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt @@ -8,10 +8,10 @@ add_mlir_dialect_library(MLIRAffineEDSC MLIRAffineOpsIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIREDSC MLIRIR MLIRLoopLikeInterface MLIRSideEffectInterfaces - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt index 20bc863666687..03153389a33f1 100644 --- a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt @@ -1,4 +1,4 @@ -add_mlir_dialect_library(MLIRAffineOps +add_mlir_dialect_library(MLIRAffine AffineMemoryOpInterfaces.cpp AffineOps.cpp AffineValueMap.cpp @@ -15,5 +15,5 @@ add_mlir_dialect_library(MLIRAffineOps MLIRIR MLIRLoopLikeInterface MLIRSideEffectInterfaces - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt index c1d406ac08b4c..899f362536529 100644 --- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt @@ -18,13 +18,13 @@ add_mlir_dialect_library(MLIRAffineTransforms MLIRLoopLikeInterfaceIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAffineUtils MLIREDSC MLIRIR MLIRPass MLIRSideEffectInterfaces - MLIRStandardOps + MLIRStandard MLIRTransformUtils MLIRVector MLIRVectorToLLVM diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt index 59ae13dcabcfd..e4a5d0bbd9f15 100644 --- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt @@ -5,6 +5,6 @@ add_mlir_dialect_library(MLIRAffineUtils ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index cdb06f44b6dc8..d62ea7a7362a4 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -21,7 +21,7 @@ add_mlir_dialect_library(MLIRGPU MLIRSCF MLIRPass MLIRSideEffectInterfaces - MLIRStandardOps + MLIRStandard MLIRSupport MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt index 5bb56236a04d0..b7c7a67fef1ba 100644 --- a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt @@ -1,11 +1,11 @@ add_mlir_dialect_library(MLIRLinalgAnalysis DependenceAnalysis.cpp - + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg LINK_LIBS PUBLIC MLIRIR - MLIRLinalgOps - MLIRStandardOps + MLIRLinalg + MLIRStandard ) diff --git a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt index 91fdaa4f18a3e..d7f4fff3bc383 100644 --- a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt @@ -7,9 +7,9 @@ add_mlir_dialect_library(MLIRLinalgEDSC LINK_LIBS PUBLIC MLIREDSC MLIRIR - MLIRAffineOps + MLIRAffine MLIRAffineEDSC - MLIRLinalgOps + MLIRLinalg MLIRSCF - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt index 3cd3401ec9868..963260adad661 100644 --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -1,4 +1,4 @@ -add_mlir_dialect_library(MLIRLinalgOps +add_mlir_dialect_library(MLIRLinalg LinalgOps.cpp LinalgTypes.cpp @@ -14,5 +14,5 @@ add_mlir_dialect_library(MLIRLinalgOps MLIRIR MLIRSideEffectInterfaces MLIRViewLikeInterface - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index 73cd9194fe6f2..a281aa55a44fb 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -17,18 +17,18 @@ add_mlir_dialect_library(MLIRLinalgTransforms MLIRLinalgPassIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAnalysis MLIREDSC MLIRIR MLIRLinalgAnalysis MLIRLinalgEDSC - MLIRLinalgOps + MLIRLinalg MLIRLinalgUtils MLIRSCF MLIRSCFTransforms MLIRPass - MLIRStandardOps + MLIRStandard MLIRStandardToLLVM MLIRTransformUtils MLIRVector diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt index 8b3e89768c55d..0d092ddae56a6 100644 --- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt @@ -5,13 +5,13 @@ add_mlir_dialect_library(MLIRLinalgUtils ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIREDSC MLIRIR MLIRLinalgEDSC - MLIRLinalgOps + MLIRLinalg MLIRSCF MLIRPass - MLIRStandardOps + MLIRStandard MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/Quant/CMakeLists.txt b/mlir/lib/Dialect/Quant/CMakeLists.txt index 130a415f21acc..f95b6ce4e568a 100644 --- a/mlir/lib/Dialect/Quant/CMakeLists.txt +++ b/mlir/lib/Dialect/Quant/CMakeLists.txt @@ -21,6 +21,6 @@ add_mlir_dialect_library(MLIRQuant MLIRPass MLIRSideEffectInterfaces MLIRSupport - MLIRStandardOps + MLIRStandard MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/SCF/CMakeLists.txt b/mlir/lib/Dialect/SCF/CMakeLists.txt index a4805102ddcb1..297e918cb6ab2 100644 --- a/mlir/lib/Dialect/SCF/CMakeLists.txt +++ b/mlir/lib/Dialect/SCF/CMakeLists.txt @@ -13,7 +13,7 @@ add_mlir_dialect_library(MLIRSCF MLIRIR MLIRLoopLikeInterface MLIRSideEffectInterfaces - MLIRStandardOps + MLIRStandard ) add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt index 341780c21c609..b3b20027896e1 100644 --- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt @@ -11,11 +11,11 @@ add_mlir_dialect_library(MLIRSCFTransforms MLIRSCFPassIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRIR MLIRPass MLIRSCF - MLIRStandardOps + MLIRStandard MLIRSupport MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt index e39f1c770f29f..1ac5b3b1e8560 100644 --- a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt @@ -17,5 +17,5 @@ add_mlir_dialect_library(MLIRShape MLIRInferTypeOpInterface MLIRIR MLIRSideEffectInterfaces - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/lib/Dialect/StandardOps/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/CMakeLists.txt index 06284f5d1daa2..e5188ecd59c14 100644 --- a/mlir/lib/Dialect/StandardOps/CMakeLists.txt +++ b/mlir/lib/Dialect/StandardOps/CMakeLists.txt @@ -1,4 +1,4 @@ -add_mlir_dialect_library(MLIRStandardOps +add_mlir_dialect_library(MLIRStandard IR/Ops.cpp EDSC/Builders.cpp EDSC/Intrinsics.cpp diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt index 299fc2bd3ccd0..d1204df2de762 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt @@ -12,6 +12,6 @@ add_mlir_dialect_library(MLIRStandardOpsTransforms LINK_LIBS PUBLIC MLIRIR MLIRPass - MLIRStandardOps + MLIRStandard MLIRTransforms ) diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt index 1087feba7fbdb..7c8c58e3fbfb5 100644 --- a/mlir/lib/Dialect/Vector/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/CMakeLists.txt @@ -14,9 +14,9 @@ add_mlir_dialect_library(MLIRVector MLIRAffineEDSC MLIREDSC MLIRIR - MLIRStandardOps - MLIRAffineOps - MLIRLinalgOps + MLIRStandard + MLIRAffine + MLIRLinalg MLIRSCF MLIRLoopAnalysis MLIRSideEffectInterfaces diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index 16258ed18b686..c71caf06ee09a 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -60,7 +60,7 @@ add_mlir_library(MLIRJitRunner MLIRExecutionEngine MLIRIR MLIRParser - MLIRStandardOps + MLIRStandard MLIRTargetLLVMIR MLIRTransforms MLIRStandardToLLVM diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt index 58c5fa6720883..8a057e397f75e 100644 --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -30,10 +30,10 @@ add_mlir_library(MLIRTransforms MLIRTransformsPassIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAnalysis MLIRCopyOpInterface - MLIRLinalgOps + MLIRLinalg MLIRLoopLikeInterface MLIRSCF MLIRPass diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt index 3fc45a8c66764..9fa59bbde55a9 100644 --- a/mlir/lib/Transforms/Utils/CMakeLists.txt +++ b/mlir/lib/Transforms/Utils/CMakeLists.txt @@ -14,10 +14,10 @@ add_mlir_library(MLIRTransformUtils MLIRStandardOpsIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAnalysis MLIRLoopAnalysis MLIRSCF MLIRPass - MLIRStandardOps + MLIRStandard ) diff --git a/mlir/test/EDSC/CMakeLists.txt b/mlir/test/EDSC/CMakeLists.txt index 96a89d3336007..5a73c250c7742 100644 --- a/mlir/test/EDSC/CMakeLists.txt +++ b/mlir/test/EDSC/CMakeLists.txt @@ -10,14 +10,14 @@ llvm_update_compile_flags(mlir-edsc-builder-api-test) target_link_libraries(mlir-edsc-builder-api-test PRIVATE - MLIRAffineOps + MLIRAffine MLIRAffineEDSC MLIREDSC MLIRIR + MLIRLinalg MLIRLinalgEDSC - MLIRLinalgOps MLIRSCF - MLIRStandardOps + MLIRStandard MLIRTransforms MLIRVector ) diff --git a/mlir/test/lib/Dialect/Test/CMakeLists.txt b/mlir/test/lib/Dialect/Test/CMakeLists.txt index b48d464e4317a..696b439929715 100644 --- a/mlir/test/lib/Dialect/Test/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Test/CMakeLists.txt @@ -38,7 +38,7 @@ add_mlir_library(MLIRTestDialect MLIRInferTypeOpInterface MLIRLinalgTransforms MLIRPass - MLIRStandardOps + MLIRStandard MLIRStandardOpsTransforms MLIRTransformUtils MLIRTransforms diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index 5bf606209ec2c..6aaedf14cf4a8 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -39,12 +39,12 @@ add_mlir_library(MLIRTestTransforms MLIRStandardOpsIncGen LINK_LIBS PUBLIC - MLIRAffineOps + MLIRAffine MLIRAnalysis MLIREDSC MLIRGPU MLIRGPUToGPURuntimeTransforms - MLIRLinalgOps + MLIRLinalg MLIRLinalgTransforms MLIRNVVMIR MLIRSCF From 4fb679d3b159f0a5e4ff87f4e7ecf44fbbf331b9 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 15:04:43 -0700 Subject: [PATCH 230/544] [flang] Fix Gw.d format output The estimation of the decimal exponent needs to allow for all 'd' of the requested significant digits. Also accept a plus sign on a "+kP" scaling factor in a format. Differential revision: https://reviews.llvm.org/D88618 --- flang/runtime/edit-output.cpp | 10 +++++----- flang/runtime/format-implementation.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index 4680c81129ed2..4d27cb6320df0 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -330,17 +330,17 @@ bool RealOutputEditing::EditFOutput(const DataEdit &edit) { template DataEdit RealOutputEditing::EditForGOutput(DataEdit edit) { edit.descriptor = 'E'; - if (!edit.width.has_value() || - (*edit.width > 0 && edit.digits.value_or(-1) == 0)) { + int significantDigits{ + edit.digits.value_or(BinaryFloatingPoint::decimalPrecision)}; // 'd' + if (!edit.width.has_value() || (*edit.width > 0 && significantDigits == 0)) { return edit; // Gw.0 -> Ew.0 for w > 0 } - decimal::ConversionToDecimalResult converted{Convert(1, edit)}; + decimal::ConversionToDecimalResult converted{ + Convert(significantDigits, edit)}; if (IsInfOrNaN(converted)) { return edit; } int expo{IsZero() ? 1 : converted.decimalExponent}; // 's' - int significantDigits{ - edit.digits.value_or(BinaryFloatingPoint::decimalPrecision)}; // 'd' if (expo < 0 || expo > significantDigits) { return edit; // Ew.d } diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h index ad8bbcbcdcbdf..91d80a7336019 100644 --- a/flang/runtime/format-implementation.h +++ b/flang/runtime/format-implementation.h @@ -97,7 +97,7 @@ int FormatControl::GetIntField( } int result{0}; bool negate{ch == '-'}; - if (negate) { + if (negate || ch == '+') { firstCh = '\0'; ch = PeekNext(); } From f0505534900bb1fcdee368136cd733aefd20ce39 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Wed, 30 Sep 2020 17:23:11 -0700 Subject: [PATCH 231/544] [mlir] Split Dialect::addOperations into two functions The current implementation uses a fold expression to add all of the operations at once. This is really nice, but apparently the lifetime of each of the AbstractOperation instances is for the entire expression which may lead to a stack overflow for large numbers of operations. This splits the method in two to allow for the lifetime of the AbstractOperation to be properly scoped. --- mlir/include/mlir/IR/Dialect.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 6395b338bce56..5bd8a745edceb 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -150,10 +150,11 @@ class Dialect { /// This method is used by derived classes to add their operations to the set. /// template void addOperations() { - (void)std::initializer_list{ - 0, (addOperation(AbstractOperation::get(*this)), 0)...}; + (void)std::initializer_list{0, (addOperation(), 0)...}; + } + template void addOperation() { + addOperation(AbstractOperation::get(*this)); } - void addOperation(AbstractOperation opInfo); /// Register a set of type classes with this dialect. From 196c097bba8b0b3932f3fcdcd5310f78ebaa43a3 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 30 Sep 2020 18:03:02 -0700 Subject: [PATCH 232/544] [AArch64][GlobalISel] Clamp oversize FP arithmetic vectors. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 +- .../AArch64/GlobalISel/legalize-fp-arith.mir | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b6a006eba53b9..4ca821322a918 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -184,7 +184,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalar(0, s32); getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) - .legalFor({s32, s64, v2s64, v4s32, v2s32}); + .legalFor({s32, s64, v2s64, v4s32, v2s32}) + .clampNumElements(0, v2s32, v4s32) + .clampNumElements(0, v2s64, v2s64); getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith.mir index a0be636dfe6ef..7c2406659f068 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith.mir @@ -73,3 +73,42 @@ body: | $q0 = COPY %2(<4 x s32>) ... +--- +name: test_fmul_v4s64 +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_fmul_v4s64 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s64>) = G_FMUL [[DEF]], [[DEF]] + ; CHECK: [[FMUL1:%[0-9]+]]:_(<2 x s64>) = G_FMUL [[DEF]], [[DEF]] + ; CHECK: $q0 = COPY [[FMUL]](<2 x s64>) + ; CHECK: $q1 = COPY [[FMUL1]](<2 x s64>) + %0:_(<4 x s64>) = G_IMPLICIT_DEF + %1:_(<4 x s64>) = G_IMPLICIT_DEF + %2:_(<4 x s64>) = G_FMUL %0, %1 + %uv1:_(<2 x s64>), %uv2:_(<2 x s64>) = G_UNMERGE_VALUES %2 + $q0 = COPY %uv1(<2 x s64>) + $q1 = COPY %uv2(<2 x s64>) + +... +--- +name: test_fmul_v8s32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_fmul_v8s32 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK: [[FMUL1:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK: $q0 = COPY [[FMUL]](<4 x s32>) + ; CHECK: $q1 = COPY [[FMUL1]](<4 x s32>) + %0:_(<8 x s32>) = G_IMPLICIT_DEF + %1:_(<8 x s32>) = G_IMPLICIT_DEF + %2:_(<8 x s32>) = G_FMUL %0, %1 + %uv1:_(<4 x s32>), %uv2:_(<4 x s32>) = G_UNMERGE_VALUES %2 + $q0 = COPY %uv1(<4 x s32>) + $q1 = COPY %uv2(<4 x s32>) +... From b656189e6a602aaf86714ccbf89d94f2ef05b644 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 30 Sep 2020 16:14:12 -0500 Subject: [PATCH 233/544] [flang][msvc] Avoid ReferenceVariantBase ctor ambiguity. NFC. Msvc reports the following error when a ReferenceVariantBase is constructed using an r-value reference or instantiated as std::vector template parameter. The error message is: ``` PFTBuilder.h(59,1): error C2665: 'std::variant<...>::variant': none of the 2 overloads could convert all the argument types variant(1248,1): message : could be 'std::variant<...>::variant(std::variant<...> &&) noexcept(false)' variant(1248,1): message : or 'std::variant<...>::variant(const std::variant<...> &) noexcept(false)' PFTBuilder.h(59,1): message : while trying to match the argument list '(common::Reference>)' ``` Work around the ambiguity by only taking `common::Reference` arguments in the constructor. That is, conversion to common::Reference has to be done be the caller instead of being done inside the ctor. Unfortunately, with this change clang/gcc (but not msvc) insist on that the ReferenceVariantBase is stored in a `std::initializer_list`-initialized variable before being used, like being passed to a function or returned. This patch is part of the series to make flang compilable with MS Visual Studio . Reviewed By: DavidTruby Differential Revision: https://reviews.llvm.org/D88109 --- flang/include/flang/Lower/PFTBuilder.h | 5 +++-- flang/lib/Lower/PFTBuilder.cpp | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h index 3230d36e0e9a8..044e6084330fa 100644 --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -55,8 +55,9 @@ class ReferenceVariantBase { using Ref = common::Reference>; ReferenceVariantBase() = delete; - template - ReferenceVariantBase(B &b) : u{Ref{b}} {} + ReferenceVariantBase(std::variant...> b) : u(b) {} + template + ReferenceVariantBase(Ref b) : u(b) {} template constexpr BaseType &get() const { diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 7195086d8e3f2..349f76ee80ac5 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -64,8 +64,11 @@ struct UnwrapStmt> { class PFTBuilder { public: PFTBuilder(const semantics::SemanticsContext &semanticsContext) - : pgm{std::make_unique()}, - parentVariantStack{*pgm.get()}, semanticsContext{semanticsContext} {} + : pgm{std::make_unique()}, semanticsContext{ + semanticsContext} { + lower::pft::ParentVariant parent{*pgm.get()}; + parentVariantStack.push_back(parent); + } /// Get the result std::unique_ptr result() { return std::move(pgm); } @@ -905,11 +908,15 @@ class PFTDumper { template static lower::pft::FunctionLikeUnit::FunctionStatement getFunctionStmt(const T &func) { - return std::get>(func.t); + lower::pft::FunctionLikeUnit::FunctionStatement result{ + std::get>(func.t)}; + return result; } template static lower::pft::ModuleLikeUnit::ModuleStatement getModuleStmt(const T &mod) { - return std::get>(mod.t); + lower::pft::ModuleLikeUnit::ModuleStatement result{ + std::get>(mod.t)}; + return result; } static const semantics::Symbol *getSymbol( @@ -1078,7 +1085,8 @@ Fortran::lower::pft::FunctionLikeUnit::FunctionLikeUnit( const auto &ps{ std::get>>(func.t)}; if (ps.has_value()) { - beginStmt = ps.value(); + FunctionStatement begin{ps.value()}; + beginStmt = begin; symbol = getSymbol(beginStmt); processSymbolTable(*symbol->scope()); } else { From 6cd8511e5932e4a53b2bb7780f69489355fc7783 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 30 Sep 2020 17:21:57 -0700 Subject: [PATCH 234/544] [WebAssembly] New-style command support This adds support for new-style command support. In this mode, all exports are considered command entrypoints, and the linker inserts calls to `__wasm_call_ctors` and `__wasm_call_dtors` for all such entrypoints. This enables support for: - Command entrypoints taking arguments other than strings and return values other than `int`. - Multicall executables without requiring on the use of string-based command-line arguments. This new behavior is disabled when the input has an explicit call to `__wasm_call_ctors`, indicating code not expecting new-style command support. This change does mean that wasm-ld no longer supports DCE-ing the `__wasm_call_ctors` function when there are no calls to it. If there are no calls to it, and there are ctors present, we assume it's wasm-ld's job to insert the calls. This seems ok though, because if there are ctors present, the program is expecting them to be called. This change affects the init-fini-gc.ll test. --- lld/test/wasm/command-exports-no-tors.s | 54 +++++++++++ lld/test/wasm/command-exports.s | 113 ++++++++++++++++++++++++ lld/test/wasm/init-fini-gc.ll | 48 ---------- lld/test/wasm/init-fini-no-gc.ll | 85 ++++++++++++++++++ lld/wasm/Driver.cpp | 24 ++++- lld/wasm/InputChunks.h | 10 ++- lld/wasm/MarkLive.cpp | 64 +++++++++----- lld/wasm/Symbols.cpp | 1 + lld/wasm/Symbols.h | 4 + lld/wasm/Writer.cpp | 109 ++++++++++++++++++++++- 10 files changed, 438 insertions(+), 74 deletions(-) create mode 100644 lld/test/wasm/command-exports-no-tors.s create mode 100644 lld/test/wasm/command-exports.s delete mode 100644 lld/test/wasm/init-fini-gc.ll create mode 100644 lld/test/wasm/init-fini-no-gc.ll diff --git a/lld/test/wasm/command-exports-no-tors.s b/lld/test/wasm/command-exports-no-tors.s new file mode 100644 index 0000000000000..e00712bed538d --- /dev/null +++ b/lld/test/wasm/command-exports-no-tors.s @@ -0,0 +1,54 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --no-entry %t.o -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +# Like command-exports.s, but with no ctors or dtors, so there should be no +# __wasm_call_ctors, __cxa_atexit, or wrappers. + + .globl foo_i32 +foo_i32: + .functype foo_i32 (i32, i32) -> (i32) + local.get 0 + local.get 1 + i32.add + end_function + + .globl foo_f64 +foo_f64: + .functype foo_f64 (f64, f64) -> (f64) + local.get 0 + local.get 1 + f64.add + end_function + + .export_name foo_i32, foo_i32 + .export_name foo_f64, foo_f64 + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foo_i32 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foo_f64 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 1 + +# CHECK: - Type: CODE + +# CHECK: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 200020016A0B +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 20002001A00B + +# CHECK: - Type: CUSTOM +# CHECK-NEXT: Name: name +# CHECK-NEXT: FunctionNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: foo_i32 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: foo_f64 diff --git a/lld/test/wasm/command-exports.s b/lld/test/wasm/command-exports.s new file mode 100644 index 0000000000000..e1b47ce9658f9 --- /dev/null +++ b/lld/test/wasm/command-exports.s @@ -0,0 +1,113 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --no-entry %t.o -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +# This test defines a command with two exported functions, as well as a static +# constructor and a static destructor. Check that the exports, constructor, and +# destructor are all set up properly. + + .globl foo_i32 +foo_i32: + .functype foo_i32 (i32, i32) -> (i32) + local.get 0 + local.get 1 + i32.add + end_function + + .globl foo_f64 +foo_f64: + .functype foo_f64 (f64, f64) -> (f64) + local.get 0 + local.get 1 + f64.add + end_function + + .globl some_ctor +some_ctor: + .functype some_ctor () -> () + end_function + + .globl some_dtor +some_dtor: + .functype some_dtor () -> () + end_function + + .hidden __cxa_atexit + .globl __cxa_atexit +__cxa_atexit: + .functype __cxa_atexit (i32, i32, i32) -> (i32) + i32.const 0 + end_function + + .section .text..Lcall_dtors.1,"",@ +.Lcall_dtors.1: + .functype .Lcall_dtors.1 (i32) -> () + call some_dtor + end_function + + .section .text..Lregister_call_dtors.1,"",@ +.Lregister_call_dtors.1: + .functype .Lregister_call_dtors.1 () -> () + block + i32.const .Lcall_dtors.1 + i32.const 0 + i32.const 0 + call __cxa_atexit + i32.eqz + br_if 0 + unreachable +.LBB6_2: + end_block + end_function + + .section .init_array.1,"",@ + .p2align 2 + .int32 some_ctor + .int32 .Lregister_call_dtors.1 + .export_name foo_i32, foo_i32 + .export_name foo_f64, foo_f64 + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foo_i32 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 8 +# CHECK-NEXT: - Name: foo_f64 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 9 + +# CHECK: - Type: CODE + +# CHECK: - Index: 8 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 10002000200110010B +# CHECK-NEXT: - Index: 9 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 10002000200110020B + +# CHECK: - Type: CUSTOM +# CHECK-NEXT: Name: name +# CHECK-NEXT: FunctionNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: __wasm_call_ctors +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: foo_i32 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: foo_f64 +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: some_ctor +# CHECK-NEXT: - Index: 4 +# CHECK-NEXT: Name: some_dtor +# CHECK-NEXT: - Index: 5 +# CHECK-NEXT: Name: __cxa_atexit +# CHECK-NEXT: - Index: 6 +# CHECK-NEXT: Name: .Lcall_dtors.1 +# CHECK-NEXT: - Index: 7 +# CHECK-NEXT: Name: .Lregister_call_dtors.1 +# CHECK-NEXT: - Index: 8 +# CHECK-NEXT: Name: foo_i32.command_export +# CHECK-NEXT: - Index: 9 +# CHECK-NEXT: Name: foo_f64.command_export diff --git a/lld/test/wasm/init-fini-gc.ll b/lld/test/wasm/init-fini-gc.ll deleted file mode 100644 index 4b2c14bd68585..0000000000000 --- a/lld/test/wasm/init-fini-gc.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: wasm-ld %t.o -o %t.wasm -; RUN: obj2yaml %t.wasm | FileCheck %s - -; RUN: wasm-ld %t.o -o %t.wasm -; RUN: obj2yaml %t.wasm | FileCheck %s - -; RUN: wasm-ld --export=__wasm_call_ctors %t.o -o %t.export.wasm -; RUN: obj2yaml %t.export.wasm | FileCheck %s -check-prefix=EXPORT - -; Test that the __wasm_call_ctor function if not referenced - -target triple = "wasm32-unknown-unknown" - -define hidden void @_start() { -entry: - ret void -} - -define hidden void @func1() { -entry: - ret void -} - -define hidden void @func2() { -entry: - ret void -} - -define i32 @__cxa_atexit(i32 %func, i32 %arg, i32 %dso_handle) { - ret i32 0 -} - -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [ - { i32, void ()*, i8* } { i32 1, void ()* @func1, i8* null } -] - -@llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [ - { i32, void ()*, i8* } { i32 1, void ()* @func2, i8* null } -] - -; CHECK-NOT: __cxa_atexit -; CHECK-NOT: __wasm_call_ctors - -; EXPORT: __wasm_call_ctors -; EXPORT: func1 -; EXPORT: func2 -; EXPORT: __cxa_atexit diff --git a/lld/test/wasm/init-fini-no-gc.ll b/lld/test/wasm/init-fini-no-gc.ll new file mode 100644 index 0000000000000..62415686847e6 --- /dev/null +++ b/lld/test/wasm/init-fini-no-gc.ll @@ -0,0 +1,85 @@ +; RUN: llc -filetype=obj -o %t.o %s +; RUN: wasm-ld %t.o -o %t.wasm +; RUN: obj2yaml %t.wasm | FileCheck %s + +; RUN: wasm-ld --export=__wasm_call_ctors %t.o -o %t.export.wasm +; RUN: obj2yaml %t.export.wasm | FileCheck %s -check-prefix=EXPORT + +; Test that we emit wrappers and call __wasm_call_ctor when not referenced. + +target triple = "wasm32-unknown-unknown" + +define hidden void @_start() { +entry: + ret void +} + +define hidden void @func1() { +entry: + ret void +} + +define hidden void @func2() { +entry: + ret void +} + +define hidden i32 @__cxa_atexit(i32 %func, i32 %arg, i32 %dso_handle) { + ret i32 0 +} + +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [ + { i32, void ()*, i8* } { i32 1, void ()* @func1, i8* null } +] + +@llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [ + { i32, void ()*, i8* } { i32 1, void ()* @func2, i8* null } +] + +; Check that we have exactly the needed exports: `memory` because that's +; currently on by default, and `_start`, because that's the default entrypoint. + +; CHECK: - Type: EXPORT +; CHECK-NEXT: Exports: +; CHECK-NEXT: - Name: memory +; CHECK-NEXT: Kind: MEMORY +; CHECK-NEXT: Index: 0 +; CHECK-NEXT: - Name: _start +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Index: 7 + +; Check the body of `_start`'s command-export wrapper. + +; CHECK: - Type: CODE + +; CHECK: - Index: 7 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 100010010B + +; Check the symbol table to ensure all the functions are here, and that +; index 7 above refers to the function we think it does. + +; CHECK: - Type: CUSTOM +; CHECK-NEXT: Name: name +; CHECK-NEXT: FunctionNames: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Name: __wasm_call_ctors +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Name: _start +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Name: func1 +; CHECK-NEXT: - Index: 3 +; CHECK-NEXT: Name: func2 +; CHECK-NEXT: - Index: 4 +; CHECK-NEXT: Name: __cxa_atexit +; CHECK-NEXT: - Index: 5 +; CHECK-NEXT: Name: .Lcall_dtors.1 +; CHECK-NEXT: - Index: 6 +; CHECK-NEXT: Name: .Lregister_call_dtors.1 +; CHECK-NEXT: - Index: 7 +; CHECK-NEXT: Name: _start.command_export + +; EXPORT: __wasm_call_ctors +; EXPORT: func1 +; EXPORT: func2 +; EXPORT: __cxa_atexit diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 9b5f6690ebf02..a6d26dcfcc430 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -572,7 +572,6 @@ static void createSyntheticSymbols() { make(nullSignature, "__wasm_apply_relocs")); } - if (config->isPic) { WasmSym::stackPointer = createUndefinedGlobal("__stack_pointer", config->is64.getValueOr(false) @@ -841,6 +840,29 @@ void LinkerDriver::link(ArrayRef argsArr) { config->entry); } + // If the user code defines a `__wasm_call_dtors` function, remember it so + // that we can call it from the command export wrappers. Unlike + // `__wasm_call_ctors` which we synthesize, `__wasm_call_dtors` is defined + // by libc/etc., because destructors are registered dynamically with + // `__cxa_atexit` and friends. + if (!config->relocatable && !config->shared && + !WasmSym::callCtors->isUsedInRegularObj && + WasmSym::callCtors->getName() != config->entry && + !config->exportedSymbols.count(WasmSym::callCtors->getName())) { + if (Symbol *callDtors = handleUndefined("__wasm_call_dtors")) { + if (auto *callDtorsFunc = dyn_cast(callDtors)) { + if (callDtorsFunc->signature && + (!callDtorsFunc->signature->Params.empty() || + !callDtorsFunc->signature->Returns.empty())) { + error("__wasm_call_dtors must have no argument or return values"); + } + WasmSym::callDtors = callDtorsFunc; + } else { + error("__wasm_call_dtors must be a function"); + } + } + } + createOptionalSymbols(); if (errorCount()) diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h index be91b19ed452c..e5671fb89237e 100644 --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -122,7 +122,10 @@ class InputSegment : public InputChunk { class InputFunction : public InputChunk { public: InputFunction(const WasmSignature &s, const WasmFunction *func, ObjFile *f) - : InputChunk(f, InputChunk::Function), signature(s), function(func) {} + : InputChunk(f, InputChunk::Function), signature(s), function(func), + exportName(func && func->ExportName.hasValue() + ? (*func->ExportName).str() + : llvm::Optional()) {} static bool classof(const InputChunk *c) { return c->kind() == InputChunk::Function || @@ -133,8 +136,10 @@ class InputFunction : public InputChunk { StringRef getName() const override { return function->SymbolName; } StringRef getDebugName() const override { return function->DebugName; } llvm::Optional getExportName() const { - return function ? function->ExportName : llvm::Optional(); + return exportName.hasValue() ? llvm::Optional(*exportName) + : llvm::Optional(); } + void setExportName(std::string exportName) { this->exportName = exportName; } uint32_t getComdat() const override { return function->Comdat; } uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); } uint32_t getFunctionCodeOffset() const { return function->CodeOffset; } @@ -172,6 +177,7 @@ class InputFunction : public InputChunk { } const WasmFunction *function; + llvm::Optional exportName; llvm::Optional functionIndex; llvm::Optional tableIndex; uint32_t compressedFuncSize = 0; diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 2764c88f492cf..2766eec07ecb3 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -44,6 +44,7 @@ class MarkLive { void enqueue(Symbol *sym); void markSymbol(Symbol *sym); void mark(); + bool isCallCtorsLive(); // A list of chunks to visit. SmallVector queue; @@ -58,22 +59,6 @@ void MarkLive::enqueue(Symbol *sym) { sym->markLive(); if (InputChunk *chunk = sym->getChunk()) queue.push_back(chunk); - - // The ctor functions are all referenced by the synthetic callCtors - // function. However, this function does not contain relocations so we - // have to manually mark the ctors as live if callCtors itself is live. - if (sym == WasmSym::callCtors) { - if (config->isPic) - enqueue(WasmSym::applyRelocs); - for (const ObjFile *obj : symtab->objectFiles) { - const WasmLinkingData &l = obj->getWasmObj()->linkingData(); - for (const WasmInitFunc &f : l.InitFunctions) { - auto* initSym = obj->getFunctionSymbol(f.Symbol); - if (!initSym->isDiscarded()) - enqueue(initSym); - } - } - } } void MarkLive::run() { @@ -86,16 +71,29 @@ void MarkLive::run() { if (sym->isNoStrip() || sym->isExported()) enqueue(sym); - // For relocatable output, we need to preserve all the ctor functions - if (config->relocatable) { - for (const ObjFile *obj : symtab->objectFiles) { - const WasmLinkingData &l = obj->getWasmObj()->linkingData(); - for (const WasmInitFunc &f : l.InitFunctions) - enqueue(obj->getFunctionSymbol(f.Symbol)); + // If we'll be calling the user's `__wasm_call_dtors` function, mark it live. + if (Symbol *callDtors = WasmSym::callDtors) + enqueue(callDtors); + + // The ctor functions are all referenced by the synthetic callCtors + // function. However, this function does not contain relocations so we + // have to manually mark the ctors as live. + for (const ObjFile *obj : symtab->objectFiles) { + const WasmLinkingData &l = obj->getWasmObj()->linkingData(); + for (const WasmInitFunc &f : l.InitFunctions) { + auto *initSym = obj->getFunctionSymbol(f.Symbol); + if (!initSym->isDiscarded()) + enqueue(initSym); } } + // In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`. if (config->isPic) + enqueue(WasmSym::applyRelocs); + + // If we have any non-discarded init functions, mark `__wasm_call_ctors` as + // live so that we assign it an index and call it. + if (isCallCtorsLive()) enqueue(WasmSym::callCtors); if (config->sharedMemory && !config->shared) @@ -169,5 +167,27 @@ void markLive() { } } +bool MarkLive::isCallCtorsLive() { + // In a reloctable link, we don't call `__wasm_call_ctors`. + if (config->relocatable) + return false; + + // In Emscripten-style PIC, we call `__wasm_call_ctors` which calls + // `__wasm_apply_relocs`. + if (config->isPic) + return true; + + // If there are any init functions, mark `__wasm_call_ctors` live so that + // it can call them. + for (const ObjFile *file : symtab->objectFiles) { + const WasmLinkingData &l = file->getWasmObj()->linkingData(); + for (const WasmInitFunc &f : l.InitFunctions) + if (!file->getFunctionSymbol(f.Symbol)->isDiscarded()) + return true; + } + + return false; +} + } // namespace wasm } // namespace lld diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 4b40f95ed52db..d69ef00329c92 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -66,6 +66,7 @@ std::string toString(wasm::Symbol::Kind kind) { namespace wasm { DefinedFunction *WasmSym::callCtors; +DefinedFunction *WasmSym::callDtors; DefinedFunction *WasmSym::initMemory; DefinedFunction *WasmSym::applyRelocs; DefinedFunction *WasmSym::initTLS; diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index eed481a0b44da..69195e414b471 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -471,6 +471,10 @@ struct WasmSym { // Function that directly calls all ctors in priority order. static DefinedFunction *callCtors; + // __wasm_call_dtors + // Function that calls the libc/etc. cleanup function. + static DefinedFunction *callDtors; + // __wasm_apply_relocs // Function that applies relocations to data segment post-instantiation. static DefinedFunction *applyRelocs; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 1d669ca7a723c..fee87f292c908 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -62,6 +62,8 @@ class Writer { void createApplyRelocationsFunction(); void createCallCtorsFunction(); void createInitTLSFunction(); + void createCommandExportWrappers(); + void createCommandExportWrapper(uint32_t functionIndex, DefinedFunction *f); void assignIndexes(); void populateSymtab(); @@ -95,6 +97,9 @@ class Writer { std::vector initFunctions; llvm::StringMap> customSectionMapping; + // Stable storage for command export wrapper function name strings. + std::list commandExportWrapperNames; + // Elements that are used to construct the final output std::string header; std::vector outputSections; @@ -640,6 +645,53 @@ void Writer::calculateTypes() { out.typeSec->registerType(e->signature); } +// In a command-style link, create a wrapper for each exported symbol +// which calls the constructors and destructors. +void Writer::createCommandExportWrappers() { + // This logic doesn't currently support Emscripten-style PIC mode. + assert(!config->isPic); + + // If there are no ctors and there's no libc `__wasm_call_dtors` to + // call, don't wrap the exports. + if (initFunctions.empty() && WasmSym::callDtors == NULL) + return; + + std::vector toWrap; + + for (Symbol *sym : symtab->getSymbols()) + if (sym->isExported()) + if (auto *f = dyn_cast(sym)) + toWrap.push_back(f); + + for (auto *f : toWrap) { + auto funcNameStr = (f->getName() + ".command_export").str(); + commandExportWrapperNames.push_back(funcNameStr); + const std::string &funcName = commandExportWrapperNames.back(); + + auto func = make(*f->getSignature(), funcName); + if (f->function->getExportName().hasValue()) + func->setExportName(f->function->getExportName()->str()); + else + func->setExportName(f->getName().str()); + + DefinedFunction *def = + symtab->addSyntheticFunction(funcName, f->flags, func); + def->markLive(); + + def->flags |= WASM_SYMBOL_EXPORTED; + def->flags &= ~WASM_SYMBOL_VISIBILITY_HIDDEN; + def->forceExport = f->forceExport; + + f->flags |= WASM_SYMBOL_VISIBILITY_HIDDEN; + f->flags &= ~WASM_SYMBOL_EXPORTED; + f->forceExport = false; + + out.functionSec->addFunction(func); + + createCommandExportWrapper(f->getFunctionIndex(), def); + } +} + static void scanRelocations() { for (ObjFile *file : symtab->objectFiles) { LLVM_DEBUG(dbgs() << "scanRelocations: " << file->getName() << "\n"); @@ -925,7 +977,10 @@ void Writer::createApplyRelocationsFunction() { // Create synthetic "__wasm_call_ctors" function based on ctor functions // in input object. void Writer::createCallCtorsFunction() { - if (!WasmSym::callCtors->isLive()) + // If __wasm_call_ctors isn't referenced, there aren't any ctors, and we + // aren't calling `__wasm_apply_relocs` for Emscripten-style PIC, don't + // define the `__wasm_call_ctors` function. + if (!WasmSym::callCtors->isLive() && initFunctions.empty() && !config->isPic) return; // First write the body's contents to a string. @@ -954,6 +1009,46 @@ void Writer::createCallCtorsFunction() { createFunction(WasmSym::callCtors, bodyContent); } +// Create a wrapper around a function export which calls the +// static constructors and destructors. +void Writer::createCommandExportWrapper(uint32_t functionIndex, + DefinedFunction *f) { + // First write the body's contents to a string. + std::string bodyContent; + { + raw_string_ostream os(bodyContent); + writeUleb128(os, 0, "num locals"); + + // If we have any ctors, or we're calling `__wasm_apply_relocs` for + // Emscripten-style PIC, call `__wasm_call_ctors` which performs those + // calls. + if (!initFunctions.empty() || config->isPic) { + writeU8(os, WASM_OPCODE_CALL, "CALL"); + writeUleb128(os, WasmSym::callCtors->getFunctionIndex(), + "function index"); + } + + // Call the user's code, leaving any return values on the operand stack. + for (size_t i = 0; i < f->signature->Params.size(); ++i) { + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, i, "local index"); + } + writeU8(os, WASM_OPCODE_CALL, "CALL"); + writeUleb128(os, functionIndex, "function index"); + + // Call the function that calls the destructors. + if (DefinedFunction *callDtors = WasmSym::callDtors) { + writeU8(os, WASM_OPCODE_CALL, "CALL"); + writeUleb128(os, callDtors->getFunctionIndex(), "function index"); + } + + // End the function, returning the return values from the user's code. + writeU8(os, WASM_OPCODE_END, "END"); + } + + createFunction(f, bodyContent); +} + void Writer::createInitTLSFunction() { if (!WasmSym::initTLS->isLive()) return; @@ -1090,6 +1185,18 @@ void Writer::run() { if (config->isPic) createApplyRelocationsFunction(); createCallCtorsFunction(); + + // Create export wrappers for commands if needed. + // + // If the input contains a call to `__wasm_call_ctors`, either in one of + // the input objects or an explicit export from the command-line, we + // assume ctors and dtors are taken care of already. + if (!config->relocatable && !config->isPic && + !WasmSym::callCtors->isUsedInRegularObj && + !WasmSym::callCtors->isExported()) { + log("-- createCommandExportWrappers"); + createCommandExportWrappers(); + } } if (!config->relocatable && config->sharedMemory && !config->shared) From d4a1db4f3fd7ce701454127465dd0ddbdb7face2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 30 Sep 2020 20:55:44 -0500 Subject: [PATCH 235/544] [flang][msvc] Workaround 'forgotten' symbols in FoldOperation. NFC. This resolves an issue where the Microsoft compiler 'forgets' symbols when using constexpr in a lambda in a templated function. The symbols are: 1. The implicit lambda captures `context` and `convert`. Fix by making them explicit captures. The error message was: ``` fold-implementation.h(1220): error C2065: 'convert': undeclared identifier ``` 2. The function template argument FROMCAT. Fix by storing it in a temporary constexpr variable inside the function. The error message was: ``` fold-implementation.h(1216): error C2065: 'FROMCAT': undeclared identifier ``` This patch is part of the series to make flang compilable with MS Visual Studio . Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D88504 --- flang/lib/Evaluate/fold-implementation.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index bb5463e697fe1..8178b277d13b0 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -1155,8 +1155,11 @@ Expr FoldOperation( return *array; } return std::visit( - [&](auto &kindExpr) -> Expr { + [&context, &convert](auto &kindExpr) -> Expr { using Operand = ResultType; + // This variable is a workaround for msvc which emits an error when + // using the FROMCAT template parameter below. + TypeCategory constexpr FromCat{FROMCAT}; char buffer[64]; if (auto value{GetScalarConstantValue(kindExpr)}) { if constexpr (TO::category == TypeCategory::Integer) { @@ -1213,7 +1216,7 @@ Expr FoldOperation( return Expr{value->IsTrue()}; } } else if constexpr (std::is_same_v && - FROMCAT != TypeCategory::Character) { + FromCat != TypeCategory::Character) { return std::move(kindExpr); // remove needless conversion } return Expr{std::move(convert)}; From 12bdd427b33a75bd7abb5d4cb095d0b983328034 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Sep 2020 19:31:08 -0700 Subject: [PATCH 236/544] [APFloat] Improve asserts in isSignificandAllOnes and isSignificandAllZeros so they protect shift operations from undefined behavior. For example, the assert in isSignificandAllZeros allowed NumHighBits to be integerPartWidth. But since it is used directly as a shift amount it must be less than integerPartWidth. --- llvm/lib/Support/APFloat.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index c5adbe9cf746a..58e49b5384cd5 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -850,8 +850,8 @@ bool IEEEFloat::isSignificandAllOnes() const { // Set the unused high bits to all ones when we compare. const unsigned NumHighBits = PartCount*integerPartWidth - semantics->precision + 1; - assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " - "fill than integerPartWidth"); + assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && + "Can not have more high bits to fill than integerPartWidth"); const integerPart HighBitFill = ~integerPart(0) << (integerPartWidth - NumHighBits); if (~(Parts[PartCount - 1] | HighBitFill)) @@ -870,9 +870,10 @@ bool IEEEFloat::isSignificandAllZeros() const { if (Parts[i]) return false; + // Compute how many bits are used in the final word. const unsigned NumHighBits = PartCount*integerPartWidth - semantics->precision + 1; - assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + assert(NumHighBits < integerPartWidth && "Can not have more high bits to " "clear than integerPartWidth"); const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; From 4e9277eda1874ead60f2c9d7cdb558fd19b32076 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 30 Sep 2020 20:09:25 -0700 Subject: [PATCH 237/544] [ELF] --wrap: don't unnecessarily expose __real_ The routing rules are: sym -> __wrap_sym __real_sym -> sym __wrap_sym and sym are routing targets, so they need to be exposed to the symbol table. __real_sym is not and can be eliminated if not used by regular object. --- lld/ELF/Driver.cpp | 2 +- lld/test/ELF/lto/wrap-1.ll | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 5e80385837cec..fa39628a21432 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1867,7 +1867,7 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) { if (!sym) continue; - Symbol *real = addUndefined(saver.save("__real_" + name)); + Symbol *real = addUnusedUndefined(saver.save("__real_" + name)); Symbol *wrap = addUnusedUndefined(saver.save("__wrap_" + name)); v.push_back({sym, real, wrap}); diff --git a/lld/test/ELF/lto/wrap-1.ll b/lld/test/ELF/lto/wrap-1.ll index 5ff46274d0879..5355df2224259 100644 --- a/lld/test/ELF/lto/wrap-1.ll +++ b/lld/test/ELF/lto/wrap-1.ll @@ -17,11 +17,12 @@ ; CHECK-NEXT: Binding: Global ; CHECK-NEXT: Type: Function -; Make sure that the 'r' (linker redefined) bit is set for bar and __wrap_bar -; in the resolutions file. -; RESOLS: ,bar,xr -; RESOLS: ,__wrap_bar,plx -; RESOLS: ,__real_bar,plxr +; Make sure that the 'r' (linker redefined) bit is set for bar and __real_bar +; in the resolutions file. The calls to bar and __real_bar will be routed to +; __wrap_bar and bar, respectively. So they cannot be inlined. +; RESOLS: ,bar,xr{{$}} +; RESOLS: ,__wrap_bar,plx{{$}} +; RESOLS: ,__real_bar,plr{{$}} target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" From 2c9dc7bbbf514b1ed7bdefacb3213beae5916b3d Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 30 Sep 2020 23:15:35 -0400 Subject: [PATCH 238/544] Revert "[llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking." This reverts commit 4fcd1a8e6528ca42fe656f2745e15d2b7f5de495 as `llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s` failed on hosts without LBR supported if the build has LIBPFM enabled. On that host, `perf_event_open` fails with `EOPNOTSUPP` on LBR config. That change's basic assumption > If this is run on a non-supported hardware, it will produce all zeroes for latency. could not stand as `perf_event_open` system call will fail if the underlying hardware really don't have LBR supported. --- .../tools/llvm-exegesis/X86/lbr/lit.local.cfg | 4 +- llvm/tools/llvm-exegesis/lib/Target.h | 5 -- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 17 ----- .../llvm-exegesis/lib/X86/X86Counter.cpp | 65 ++++--------------- llvm/tools/llvm-exegesis/lib/X86/X86Counter.h | 5 -- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 15 ++++- 6 files changed, 25 insertions(+), 86 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg index 69b08f27c39a5..431967c1ec9b0 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg @@ -19,9 +19,9 @@ else: try: with open(os.devnull, 'w') as quiet: check_llvm_exegesis_uops_result = subprocess.call( - [llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) check_llvm_exegesis_latency_result = subprocess.call( - [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) except OSError: print('could not exec llvm-exegesis') config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 8a5624b42803a..70890795426d9 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -142,11 +142,6 @@ class ExegesisTarget { return {&Instr}; } - // Checks hardware and software support for current benchmark mode. - // Returns an error if the target host does not have support to run the - // benchmark. - virtual Error checkFeatureSupport() const { return Error::success(); } - // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 270825a8777ba..9f045fa11aa24 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -674,23 +674,6 @@ class ExegesisX86Target : public ExegesisTarget { return Arch == Triple::x86_64 || Arch == Triple::x86; } - Error checkFeatureSupport() const override { - // LBR is the only feature we conditionally support now. - // So if LBR is not requested, then we should be able to run the benchmarks. - if (LbrSamplingPeriod == 0) - return Error::success(); - -#if defined(__linux__) && defined(HAVE_LIBPFM) && \ - defined(LIBPFM_HAS_FIELD_CYCLES) - // If the kernel supports it, the hardware still may not have it. - return X86LbrCounter::checkLbrSupport(); -#else - return llvm::make_error( - "LBR not supported on this kernel and/or platform", - llvm::errc::not_supported); -#endif - } - static const unsigned kUnavailableRegisters[4]; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp index 25ec4f8586755..57b493818aaad 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -21,7 +21,6 @@ #endif // HAVE_LIBPFM #include -#include #include #include #include @@ -36,8 +35,6 @@ namespace llvm { namespace exegesis { -// Number of entries in the LBR. -static constexpr int kLbrEntries = 16; static constexpr size_t kBufferPages = 8; static const size_t kDataBufferSize = kBufferPages * getpagesize(); @@ -73,6 +70,7 @@ static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail, static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, const void *From, const void *To, llvm::SmallVector *CycleArray) { + assert(From != nullptr && To != nullptr); const char *DataPtr = DataBuf; while (DataPtr < DataBuf + DataSize) { struct perf_event_header Header; @@ -151,47 +149,21 @@ void X86LbrCounter::start() { ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */); } -llvm::Error X86LbrCounter::checkLbrSupport() { - // Do a sample read and check if the results contain non-zero values. - - X86LbrCounter counter(X86LbrPerfEvent(123)); - counter.start(); - - // Prevent the compiler from unrolling the loop and get rid of all the - // branches. We need at least 16 iterations. - int Sum = 0; - int V = 1; - - volatile int *P = &V; - auto TimeLimit = - std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5); - - for (int I = 0; - I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit; - ++I) { - Sum += *P; - } - - counter.stop(); - - auto ResultOrError = counter.doReadCounter(nullptr, nullptr); - if (ResultOrError) - if (!ResultOrError.get().empty()) - // If there is at least one non-zero entry, then LBR is supported. - for (const int64_t &Value : ResultOrError.get()) - if (Value != 0) - return Error::success(); - - return llvm::make_error( - "LBR format with cycles is not suppported on the host.", - llvm::errc::not_supported); -} - llvm::Expected> X86LbrCounter::readOrError(StringRef FunctionBytes) const { + // The max number of time-outs/retries before we give up. + static constexpr int kMaxTimeouts = 160; + // Disable the event before reading ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); + // Parses the LBR buffer and fills CycleArray with the sequence of cycle + // counts from the buffer. + llvm::SmallVector CycleArray; + std::unique_ptr DataBuf(new char[kDataBufferSize]); + int NumTimeouts = 0; + int PollResult = 0; + // Find the boundary of the function so that we could filter the LBRs // to keep only the relevant records. if (FunctionBytes.empty()) @@ -200,21 +172,6 @@ X86LbrCounter::readOrError(StringRef FunctionBytes) const { const void *From = reinterpret_cast(FunctionBytes.data()); const void *To = reinterpret_cast(FunctionBytes.data() + FunctionBytes.size()); - return doReadCounter(From, To); -} - -llvm::Expected> -X86LbrCounter::doReadCounter(const void *From, const void *To) const { - // The max number of time-outs/retries before we give up. - static constexpr int kMaxTimeouts = 160; - - // Parses the LBR buffer and fills CycleArray with the sequence of cycle - // counts from the buffer. - llvm::SmallVector CycleArray; - auto DataBuf = std::make_unique(kDataBufferSize); - int NumTimeouts = 0; - int PollResult = 0; - while (PollResult <= 0) { PollResult = pollLbrPerfEvent(FileDescriptor); if (PollResult > 0) diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h index 73e4dc5b990a0..94062012917df 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h @@ -33,8 +33,6 @@ class X86LbrPerfEvent : public pfm::PerfEvent { class X86LbrCounter : public pfm::Counter { public: - static llvm::Error checkLbrSupport(); - explicit X86LbrCounter(pfm::PerfEvent &&Event); virtual ~X86LbrCounter(); @@ -45,9 +43,6 @@ class X86LbrCounter : public pfm::Counter { readOrError(StringRef FunctionBytes) const override; private: - llvm::Expected> - doReadCounter(const void *From, const void *To) const; - void *MMappedBuffer = nullptr; }; diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index bc2f348a7eaeb..fb3f41e147348 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -160,6 +160,12 @@ static cl::opt cl::desc(""), cl::cat(AnalysisOptions), cl::init("")); +static cl::list + AllowedHostCpus("allowed-host-cpu", + cl::desc("If specified, only run the benchmark if the host " + "CPU matches the names"), + cl::cat(Options), cl::ZeroOrMore); + static cl::opt AnalysisDisplayUnstableOpcodes( "analysis-display-unstable-clusters", cl::desc("if there is more than one benchmark for an opcode, said " @@ -296,9 +302,12 @@ void benchmarkMain() { const LLVMState State(CpuName); - // Preliminary check to ensure features needed for requested - // benchmark mode are present on target CPU and/or OS. - ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); + llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU(); + for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end(); + ++Begin) { + if (ActualCpu != *Begin) + ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu)); + } const std::unique_ptr Runner = ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( From c93a39dd1fdd74cb87ef65cfd42d81c62a07ed91 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 1 Oct 2020 12:11:24 +0700 Subject: [PATCH 239/544] [SCEV][NFC] Introduce isKnownPredicateAt method We can query known predicates in different points, respecting their dominating conditions. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 5 +++++ llvm/lib/Analysis/ScalarEvolution.cpp | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 4fc1ee08caf7d..febca473776aa 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -916,6 +916,11 @@ class ScalarEvolution { bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test if the given expression is known to satisfy the condition described + /// by Pred, LHS, and RHS in the given Context. + bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const Instruction *Context); + /// Test if the condition described by Pred, LHS, RHS is known to be true on /// every iteration of the loop of the recurrency LHS. bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8759f86e031d2..e51b31673105c 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9100,6 +9100,14 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); } +bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const Instruction *Context) { + // TODO: Analyze guards and assumes from Context's block. + return isKnownPredicate(Pred, LHS, RHS) || + isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS); +} + bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS) { From de973e0b07207a22d5ca04fd56fad6a40ced4172 Mon Sep 17 00:00:00 2001 From: Igor Chervatyuk Date: Thu, 24 Sep 2020 10:23:45 +0300 Subject: [PATCH 240/544] [RISCV][ASAN] implementation for previous/next pc routines for riscv64 [7/11] patch series to port ASAN for riscv64 Depends On D87575 Reviewed By: eugenis, vitalybuka, luismarques Differential Revision: https://reviews.llvm.org/D87577 --- .../sanitizer_common/sanitizer_stacktrace.cpp | 22 +++++++++++++++++++ .../sanitizer_common/sanitizer_stacktrace.h | 8 +++++++ 2 files changed, 30 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp index ef14fb704eed3..ca2f90a51c9e8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp @@ -21,6 +21,28 @@ uptr StackTrace::GetNextInstructionPc(uptr pc) { return pc + 8; #elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) return pc + 4; +#elif SANITIZER_RISCV64 + // Current check order is 4 -> 2 -> 6 -> 8 + u8 InsnByte = *(u8 *)(pc); + if (((InsnByte & 0x3) == 0x3) && ((InsnByte & 0x1c) != 0x1c)) { + // xxxxxxxxxxxbbb11 | 32 bit | bbb != 111 + return pc + 4; + } + if ((InsnByte & 0x3) != 0x3) { + // xxxxxxxxxxxxxxaa | 16 bit | aa != 11 + return pc + 2; + } + // RISC-V encoding allows instructions to be up to 8 bytes long + if ((InsnByte & 0x3f) == 0x1f) { + // xxxxxxxxxx011111 | 48 bit | + return pc + 6; + } + if ((InsnByte & 0x7f) == 0x3f) { + // xxxxxxxxx0111111 | 64 bit | + return pc + 8; + } + // bail-out if could not figure out the instruction size + return 0; #else return pc + 1; #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h index 4162b58a867de..9111acce0c60a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -85,6 +85,14 @@ uptr StackTrace::GetPreviousInstructionPc(uptr pc) { return pc - 4; #elif defined(__sparc__) || defined(__mips__) return pc - 8; +#elif SANITIZER_RISCV64 + // RV-64 has variable instruciton length... + // C extentions gives us 2-byte instructoins + // RV-64 has 4-byte instructions + // + RISCV architecture allows instructions up to 8 bytes + // It seems difficult to figure out the exact instruction length - + // pc - 2 seems like a safe option for the purposes of stack tracing + return pc - 2; #else return pc - 1; #endif From 3d27a99b2ed24e1951483cf13357ec188ad44bb0 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Thu, 1 Oct 2020 10:20:16 +0500 Subject: [PATCH 241/544] [LLDB] Remove AArch64/Linux xfail decorator from TestGuiBasicDebug This test now passes on AArch64/Linux after following change by Jonas: d689570d7dcb16ee241676e22324dc456837eb23 --- lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py index 81067bf776e39..9deb700da39c6 100644 --- a/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py +++ b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py @@ -15,7 +15,6 @@ class TestGuiBasicDebugCommandTest(PExpectTest): # under ASAN on a loaded machine.. @skipIfAsan @skipIfCursesSupportMissing - @expectedFailureAll(archs=["aarch64"], oslist=["linux"]) def test_gui(self): self.build() From 71dcbe1e88b446ae7f405da1b3006b966ccc6ca6 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 30 Sep 2020 22:36:44 -0700 Subject: [PATCH 242/544] We don't need two different ways to get commit access, just simplify the policy here so that old SVN users and new contributors do the same thing. --- llvm/docs/DeveloperPolicy.rst | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index 0af0f1dc493b5..7db365e0bb5fd 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -298,11 +298,10 @@ omissions can be handled by sending a reply to the commits mailing list. Obtaining Commit Access ----------------------- -New Contributors -^^^^^^^^^^^^^^^^ We grant commit access to contributors with a track record of submitting high quality patches. If you would like commit access, please send an email to -`Chris `_ with your GitHub username. +`Chris `_ with your GitHub username. This is true +for former contributors with SVN access as well as new contributors. Prior to obtaining commit access, it is common practice to request that someone with commit access commits on your behalf. When doing so, please @@ -345,12 +344,6 @@ after they are committed, depending on the nature of the change). You are encouraged to review other peoples' patches as well, but you aren't required to do so. -Current Contributors - Transferring from SVN -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you had commit access to SVN and would like to request commit access to -GitHub, please email `llvm-admin `_ with your -SVN username and GitHub username. - .. _discuss the change/gather consensus: Making a Major Change From 1fedd90cc7a8deabf7d75d3e668bd56ce9b1ffcc Mon Sep 17 00:00:00 2001 From: Andrew Dona-Couch Date: Thu, 1 Oct 2020 18:49:12 +1300 Subject: [PATCH 243/544] [AVR] fix interrupt stack pointer restoration This patch fixes a corruption of the stack pointer and several registers in any AVR interrupt with non-empty stack frame. Previously, the callee-saved registers were popped before restoring the stack pointer, causing the pointer math to use the wrong base value while also corrupting the caller's register. This change fixes the code to restore the stack pointer last before exiting the interrupt service routine. https://bugs.llvm.org/show_bug.cgi?id=47253 Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D87735 Patch by Andrew Dona-Couch. --- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 33 +++++++++++++++------- llvm/test/CodeGen/AVR/interrupts.ll | 35 ++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index c95a553b86acf..757b41466c3f7 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -131,6 +131,26 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } +static void restoreStatusRegister(MachineFunction &MF, MachineBasicBlock &MBB) { + const AVRMachineFunctionInfo *AFI = MF.getInfo(); + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + + DebugLoc DL = MBBI->getDebugLoc(); + const AVRSubtarget &STI = MF.getSubtarget(); + const AVRInstrInfo &TII = *STI.getInstrInfo(); + + // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal + // handlers at the very end of the function, just before reti. + if (AFI->isInterruptOrSignalHandler()) { + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); + BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) + .addImm(0x3f) + .addReg(AVR::R0, RegState::Kill); + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); + } +} + void AVRFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const AVRMachineFunctionInfo *AFI = MF.getInfo(); @@ -151,18 +171,9 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); - // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal - // handlers at the very end of the function, just before reti. - if (AFI->isInterruptOrSignalHandler()) { - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); - BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) - .addImm(0x3f) - .addReg(AVR::R0, RegState::Kill); - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); - } - // Early exit if there is no need to restore the frame pointer. if (!FrameSize) { + restoreStatusRegister(MF, MBB); return; } @@ -198,6 +209,8 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, // Write back R29R28 to SP and temporarily disable interrupts. BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP) .addReg(AVR::R29R28, RegState::Kill); + + restoreStatusRegister(MF, MBB); } // Return true if the specified function should have a dedicated frame diff --git a/llvm/test/CodeGen/AVR/interrupts.ll b/llvm/test/CodeGen/AVR/interrupts.ll index b402d867e12b2..c6550a0fb6ae5 100644 --- a/llvm/test/CodeGen/AVR/interrupts.ll +++ b/llvm/test/CodeGen/AVR/interrupts.ll @@ -64,5 +64,40 @@ define void @signal_handler_via_attribute() #1 { ret void } +define avr_intrcc void @interrupt_alloca() { +; CHECK-LABEL: interrupt_alloca: +; CHECK: sei +; CHECK-NEXT: push r0 +; CHECK-NEXT: push r1 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: push r0 +; CHECK: clr r0 +; CHECK: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: sbiw r28, 1 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, r28 +; CHECK: adiw r28, 1 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK: pop r0 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: pop r1 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: reti + alloca i8 + ret void +} + attributes #0 = { "interrupt" } attributes #1 = { "signal" } From da11479fd1fa62e59a16790f4dc2d80c9facf2da Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 18 Sep 2020 11:33:16 -0700 Subject: [PATCH 244/544] [AArch64][GlobalISel] Select all-zero G_BUILD_VECTOR into a zero mov. Unfortunately the leaf SDAG patterns aren't supported yet so we need to do this manually, but it's not a significant amount of code anyway. Differential Revision: https://reviews.llvm.org/D87924 --- .../GISel/AArch64InstructionSelector.cpp | 23 ++++- .../GlobalISel/select-build-vector.mir | 96 +++++++++++++------ llvm/test/CodeGen/AArch64/arm64-vabs.ll | 8 +- llvm/test/CodeGen/AArch64/combine-loads.ll | 3 +- 4 files changed, 94 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 658ff94af2dc6..1daa2b29b9d54 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -4703,8 +4703,9 @@ bool AArch64InstructionSelector::selectInsertElt( bool AArch64InstructionSelector::tryOptConstantBuildVec( MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!"); - if (DstTy.getSizeInBits() < 32) + unsigned DstSize = DstTy.getSizeInBits(); + assert(DstSize <= 128 && "Unexpected build_vec type!"); + if (DstSize < 32) return false; // Check if we're building a constant vector, in which case we want to // generate a constant pool load instead of a vector insert sequence. @@ -4725,6 +4726,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec( } Constant *CV = ConstantVector::get(Csts); MachineIRBuilder MIB(I); + if (CV->isNullValue()) { + // Until the importer can support immAllZerosV in pattern leaf nodes, + // select a zero move manually here. + Register DstReg = I.getOperand(0).getReg(); + if (DstSize == 128) { + auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); + } else if (DstSize == 64) { + auto Mov = + MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) + .addImm(0); + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addReg(Mov.getReg(0), 0, AArch64::dsub); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); + } + } auto *CPLoad = emitLoadFromConstantPool(CV, MIB); if (!CPLoad) { LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir index d3ec9a8728752..af186b49af232 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir @@ -1,28 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- | - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64" - - define <4 x float> @test_f32(float %a, float %b, float %c, float %d) { - ret <4 x float> undef - } - - define <2 x double> @test_f64(double %a, double %b) { - ret <2 x double> undef - } - - define <4 x i32> @test_i32(i32 %a, i32 %b, i32 %c, i32 %d) { - ret <4 x i32> undef - } - - define <2 x i64> @test_i64(i64 %a, i64 %b) { - ret <2 x i64> undef - } - - define void @test_p0(i64 *%a, i64 *%b) { ret void } - -... --- name: test_f32 alignment: 4 @@ -33,7 +10,7 @@ selected: false failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $s0, $s1, $s2, $s3 ; CHECK-LABEL: name: test_f32 @@ -74,7 +51,7 @@ selected: false failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $d0, $d1, $d2, $d3 ; CHECK-LABEL: name: test_f64 @@ -105,7 +82,7 @@ selected: false failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $w0, $w1, $w2, $w3 ; CHECK-LABEL: name: test_i32 @@ -140,7 +117,7 @@ selected: false failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $x0, $x1 ; CHECK-LABEL: name: test_i64 @@ -169,7 +146,7 @@ selected: false failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $x0, $x1 ; CHECK-LABEL: name: test_p0 @@ -188,3 +165,66 @@ body: | RET_ReallyLR implicit $q0 ... +--- +name: test_v4s32_zero +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test_v4s32_zero + ; CHECK: liveins: $x0 + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: $q0 = COPY [[MOVIv2d_ns]] + ; CHECK: RET_ReallyLR + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_CONSTANT i32 0 + %3:fpr(s32) = COPY %2(s32) + %4:fpr(s32) = COPY %2(s32) + %5:fpr(s32) = COPY %2(s32) + %6:fpr(s32) = COPY %2(s32) + %1:fpr(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR +... +--- +name: test_v8s8_zero +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test_v8s8_zero + ; CHECK: liveins: $x0 + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub + ; CHECK: $d0 = COPY [[COPY]] + ; CHECK: RET_ReallyLR + %0:gpr(p0) = COPY $x0 + %2:gpr(s8) = G_CONSTANT i8 0 + %3:fpr(s8) = COPY %2(s8) + %4:fpr(s8) = COPY %2(s8) + %5:fpr(s8) = COPY %2(s8) + %6:fpr(s8) = COPY %2(s8) + %7:fpr(s8) = COPY %2(s8) + %8:fpr(s8) = COPY %2(s8) + %9:fpr(s8) = COPY %2(s8) + %10:fpr(s8) = COPY %2(s8) + %1:fpr(<8 x s8>) = G_BUILD_VECTOR %3(s8), %4(s8), %5(s8), %6(s8), %7(s8), %8(s8), %9(s8), %10(s8) + $d0 = COPY %1(<8 x s8>) + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 2e59ffc90d337..636522901ba4c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -956,8 +956,8 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind { ; DAG: abs.2s ; DAG-NEXT: ret -; GISEL: neg.2s -; GISEL: cmge.2s +; GISEL-DAG: neg.2s +; GISEL-DAG: cmge.2s ; GISEL: bif.8b %tmp1neg = sub <2 x i32> zeroinitializer, %a %b = icmp sge <2 x i32> %a, zeroinitializer @@ -1035,8 +1035,8 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind { ; DAG: abs.2d ; DAG-NEXT: ret -; GISEL: neg.2d -; GISEL: cmge.2d +; GISEL-DAG: neg.2d +; GISEL-DAG: cmge.2d ; GISEL: bit.16b %tmp1neg = sub <2 x i64> zeroinitializer, %a %b = icmp sle <2 x i64> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll index c94751d77982f..2e88c3c82424a 100644 --- a/llvm/test/CodeGen/AArch64/combine-loads.ll +++ b/llvm/test/CodeGen/AArch64/combine-loads.ll @@ -4,8 +4,7 @@ define <2 x i64> @z(i64* nocapture nonnull readonly %p) { ; CHECK-LABEL: z: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: ldr x8, [x0, #8] ; CHECK-NEXT: mov v0.d[0], x9 From 1e8fbb3b745916160a35a6af4dfdba9bbe26c730 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 1 Oct 2020 00:05:15 -0700 Subject: [PATCH 245/544] [MC] Inline MCExpr::printVariantKind & remove UseParensForSymbolVariantBit Note, MAI may be nullptr in -show-encoding. --- llvm/include/llvm/MC/MCExpr.h | 20 ++++---------------- llvm/lib/MC/MCExpr.cpp | 19 ++++++++----------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index 3cc43e15db838..46e60d8f258d8 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -355,30 +355,20 @@ class MCSymbolRefExpr : public MCExpr { /// The symbol being referenced. const MCSymbol *Symbol; - // Subclass data stores VariantKind in bits 0..15, UseParensForSymbolVariant - // in bit 16 and HasSubsectionsViaSymbols in bit 17. + // Subclass data stores VariantKind in bits 0..15 and HasSubsectionsViaSymbols + // in bit 16. static const unsigned VariantKindBits = 16; static const unsigned VariantKindMask = (1 << VariantKindBits) - 1; - /// Specifies how the variant kind should be printed. - static const unsigned UseParensForSymbolVariantBit = 1 << VariantKindBits; - // FIXME: Remove this bit. - static const unsigned HasSubsectionsViaSymbolsBit = - 1 << (VariantKindBits + 1); + static const unsigned HasSubsectionsViaSymbolsBit = 1 << VariantKindBits; static unsigned encodeSubclassData(VariantKind Kind, - bool UseParensForSymbolVariant, - bool HasSubsectionsViaSymbols) { + bool HasSubsectionsViaSymbols) { return (unsigned)Kind | - (UseParensForSymbolVariant ? UseParensForSymbolVariantBit : 0) | (HasSubsectionsViaSymbols ? HasSubsectionsViaSymbolsBit : 0); } - bool useParensForSymbolVariant() const { - return (getSubclassData() & UseParensForSymbolVariantBit) != 0; - } - explicit MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind, const MCAsmInfo *MAI, SMLoc Loc = SMLoc()); @@ -405,8 +395,6 @@ class MCSymbolRefExpr : public MCExpr { return (VariantKind)(getSubclassData() & VariantKindMask); } - void printVariantKind(raw_ostream &OS) const; - bool hasSubsectionsViaSymbols() const { return (getSubclassData() & HasSubsectionsViaSymbolsBit) != 0; } diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index 1953fd3fdd45f..b433277c3dc51 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -85,8 +85,13 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { } else Sym.print(OS, MAI); - if (SRE.getKind() != MCSymbolRefExpr::VK_None) - SRE.printVariantKind(OS); + const MCSymbolRefExpr::VariantKind Kind = SRE.getKind(); + if (Kind != MCSymbolRefExpr::VK_None) { + if (MAI && MAI->useParensForSymbolVariant()) // ARM + OS << '(' << MCSymbolRefExpr::getVariantKindName(Kind) << ')'; + else + OS << '@' << MCSymbolRefExpr::getVariantKindName(Kind); + } return; } @@ -197,8 +202,7 @@ const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx, MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind, const MCAsmInfo *MAI, SMLoc Loc) : MCExpr(MCExpr::SymbolRef, Loc, - encodeSubclassData(Kind, MAI->useParensForSymbolVariant(), - MAI->hasSubsectionsViaSymbols())), + encodeSubclassData(Kind, MAI->hasSubsectionsViaSymbols())), Symbol(Symbol) { assert(Symbol); } @@ -510,13 +514,6 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Default(VK_Invalid); } -void MCSymbolRefExpr::printVariantKind(raw_ostream &OS) const { - if (useParensForSymbolVariant()) - OS << '(' << MCSymbolRefExpr::getVariantKindName(getKind()) << ')'; - else - OS << '@' << MCSymbolRefExpr::getVariantKindName(getKind()); -} - /* *** */ void MCTargetExpr::anchor() {} From dfa2c14b8fe8166ff9ff951b8b70a2004401d0db Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 15:15:42 +0100 Subject: [PATCH 246/544] [ARM][LowOverheadLoops] Use iterator for InsertPt. Use a MachineBasicBlock::iterator instead of a MachineInstr* for the position of our LoopStart instruction. NFCish, as it change debug info. --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 103 ++++++++++-------- .../Thumb2/LowOverheadLoops/matrix-debug.mir | 2 +- 2 files changed, 60 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index f5fbe26f9f782..c86cf32357322 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -355,7 +355,8 @@ namespace { const TargetRegisterInfo &TRI; const ARMBaseInstrInfo &TII; MachineFunction *MF = nullptr; - MachineInstr *InsertPt = nullptr; + MachineBasicBlock::iterator StartInsertPt; + MachineBasicBlock *StartInsertBB = nullptr; MachineInstr *Start = nullptr; MachineInstr *Dec = nullptr; MachineInstr *End = nullptr; @@ -402,7 +403,7 @@ namespace { // Check that the predication in the loop will be equivalent once we // perform the conversion. Also ensure that we can provide the number // of elements to the loop start instruction. - bool ValidateTailPredicate(MachineInstr *StartInsertPt); + bool ValidateTailPredicate(); // Check that any values available outside of the loop will be the same // after tail predication conversion. @@ -585,10 +586,7 @@ static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA, return false; } -bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { - if (!StartInsertPt) - return false; - +bool LowOverheadLoop::ValidateTailPredicate() { if (!IsTailPredicationLegal()) { LLVM_DEBUG(if (VCTPs.empty()) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"; @@ -632,19 +630,19 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { // The element count register maybe defined after InsertPt, in which case we // need to try to move either InsertPt or the def so that the [w|d]lstp can // use the value. - MachineBasicBlock *InsertBB = StartInsertPt->getParent(); - if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) { - if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) { - if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) { + if (StartInsertPt != StartInsertBB->end() && + !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) { + if (auto *ElemDef = RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) { + if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) { ElemDef->removeFromParent(); - InsertBB->insert(MachineBasicBlock::iterator(StartInsertPt), ElemDef); + StartInsertBB->insert(StartInsertPt, ElemDef); LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " << *ElemDef); - } else if (RDA.isSafeToMoveBackwards(StartInsertPt, ElemDef)) { + } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) { StartInsertPt->removeFromParent(); - InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), - StartInsertPt); + StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), + &*StartInsertPt); LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); } else { // If we fail to move an instruction and the element count is provided @@ -653,7 +651,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { MachineOperand Operand = ElemDef->getOperand(1); if (isMovRegOpcode(ElemDef->getOpcode()) && RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) == - RDA.getUniqueReachingMIDef(StartInsertPt, Operand.getReg())) { + RDA.getUniqueReachingMIDef(&*StartInsertPt, Operand.getReg())) { TPNumElements = Operand; NumElements = TPNumElements.getReg(); } else { @@ -683,7 +681,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { return false; }; - if (CannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end())) + if (CannotInsertWDLSTPBetween(StartInsertPt, StartInsertBB->end())) return false; // Especially in the case of while loops, InsertBB may not be the @@ -704,7 +702,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { // Search backwards for a def, until we get to InsertBB. MachineBasicBlock *MBB = Preheader; - while (MBB && MBB != InsertBB) { + while (MBB && MBB != StartInsertBB) { if (CannotProvideElements(MBB, NumElements)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n"); return false; @@ -1017,10 +1015,15 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { // Find a suitable position to insert the loop start instruction. It needs to // be able to safely define LR. auto FindStartInsertionPoint = [](MachineInstr *Start, - ReachingDefAnalysis &RDA) -> MachineInstr* { + MachineBasicBlock::iterator &InsertPt, + MachineBasicBlock *&InsertBB, + ReachingDefAnalysis &RDA) { // We can define LR because LR already contains the same value. - if (Start->getOperand(0).getReg() == ARM::LR) - return Start; + if (Start->getOperand(0).getReg() == ARM::LR) { + InsertPt = MachineBasicBlock::iterator(Start); + InsertBB = Start->getParent(); + return true; + } unsigned CountReg = Start->getOperand(0).getReg(); auto IsMoveLR = [&CountReg](MachineInstr *MI) { @@ -1035,32 +1038,41 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { // Find an insertion point: // - Is there a (mov lr, Count) before Start? If so, and nothing else // writes to Count before Start, we can insert at that mov. - if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) - if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) - return LRDef; + if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) { + if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) { + InsertPt = MachineBasicBlock::iterator(LRDef); + InsertBB = LRDef->getParent(); + return true; + } + } // - Is there a (mov lr, Count) after Start? If so, and nothing else writes // to Count after Start, we can insert at that mov. - if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) - if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) - return LRDef; + if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) { + if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) { + InsertPt = MachineBasicBlock::iterator(LRDef); + InsertBB = LRDef->getParent(); + return true; + } + } // We've found no suitable LR def and Start doesn't use LR directly. Can we // just define LR anyway? - return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr; - }; + if (!RDA.isSafeToDefRegAt(Start, ARM::LR)) + return false; - InsertPt = FindStartInsertionPoint(Start, RDA); - Revert = !ValidateRanges(Start, End, BBUtils, ML) || !InsertPt; - CannotTailPredicate = !ValidateTailPredicate(InsertPt); + InsertPt = MachineBasicBlock::iterator(Start); + InsertBB = Start->getParent(); + return true; + }; - LLVM_DEBUG(if (!InsertPt) - dbgs() << "ARM Loops: Unable to find safe insertion point.\n"; - else - dbgs() << "ARM Loops: Start insertion point: " << *InsertPt; - if (CannotTailPredicate) - dbgs() << "ARM Loops: Couldn't validate tail predicate.\n" - ); + if (!FindStartInsertionPoint(Start, StartInsertPt, StartInsertBB, RDA)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); + Revert = true; + return; + } + Revert = !ValidateRanges(Start, End, BBUtils, ML); + CannotTailPredicate = !ValidateTailPredicate(); } bool LowOverheadLoop::AddVCTP(MachineInstr *MI) { @@ -1398,7 +1410,10 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) { // Collect and remove the users of iteration count. SmallPtrSet Killed = { LoLoop.Start, LoLoop.Dec, - LoLoop.End, LoLoop.InsertPt }; + LoLoop.End }; + if (LoLoop.StartInsertPt != LoLoop.StartInsertBB->end()) + Killed.insert(&*LoLoop.StartInsertPt); + if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"); } @@ -1409,15 +1424,15 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { // calculate the number of loop iterations. IterationCountDCE(LoLoop); - MachineInstr *InsertPt = LoLoop.InsertPt; + MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt; MachineInstr *Start = LoLoop.Start; - MachineBasicBlock *MBB = InsertPt->getParent(); + MachineBasicBlock *MBB = LoLoop.StartInsertBB; bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; unsigned Opc = LoLoop.getStartOpcode(); MachineOperand &Count = LoLoop.getLoopStartOperand(); MachineInstrBuilder MIB = - BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); + BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); MIB.add(Count); @@ -1425,8 +1440,8 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { MIB.add(Start->getOperand(1)); // If we're inserting at a mov lr, then remove it as it's redundant. - if (InsertPt != Start) - LoLoop.ToRemove.insert(InsertPt); + if (InsertPt != MBB->end()) + LoLoop.ToRemove.insert(&*InsertPt); LoLoop.ToRemove.insert(Start); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); return &*MIB; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir index d66cbf90efe7e..e3eb367f68de2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir @@ -249,7 +249,7 @@ body: | ; CHECK: renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (load 4 from %ir.arrayidx7.us) ; CHECK: $r3 = tMOVr $r5, 14 /* CC::al */, $noreg, debug-location !32 ; CHECK: $r0 = tMOVr $r8, 14 /* CC::al */, $noreg, debug-location !32 - ; CHECK: $lr = t2DLS renamable $r10, debug-location !32 + ; CHECK: $lr = t2DLS renamable $r10, debug-location !42 ; CHECK: bb.3.for.body3.us: ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r8, $r9, $r10, $r12 From 456974ac78f107d74b6db35401aff5ac4ab2665d Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 1 Oct 2020 00:49:09 -0700 Subject: [PATCH 247/544] [sanitizer] Fix SymbolizedStack leak --- .../lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp index 4ef305cf17991..68bd0bb296292 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp @@ -134,6 +134,7 @@ void __sanitizer_symbolize_pc(uptr pc, const char *fmt, char *out_buf, } CHECK(out_buf <= out_end); *out_buf = 0; + frame->ClearAll(); } SANITIZER_INTERFACE_ATTRIBUTE From e5795a1b364d6d19cd557a3a6d38759eb9d8631f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 1 Oct 2020 10:03:58 +0200 Subject: [PATCH 248/544] [ORC][examples] Remove ThinLtoJIT example after LLJITWithThinLTOSummaries landed in OrcV2Examples The ThinLtoJIT example was aiming to utilize ThinLTO summaries and concurrency in ORC for speculative compilation. The latter is heavily dependent on asynchronous task scheduling which is probably done better out-of-tree with a mature library like Boost-ASIO. The pure utilization of ThinLTO summaries in ORC is demonstrated in OrcV2Examples/LLJITWithThinLTOSummaries. --- llvm/examples/CMakeLists.txt | 1 - llvm/examples/ThinLtoJIT/CMakeLists.txt | 19 - .../ThinLtoJIT/ThinLtoDiscoveryThread.cpp | 65 ---- .../ThinLtoJIT/ThinLtoDiscoveryThread.h | 57 --- .../ThinLtoInstrumentationLayer.cpp | 225 ------------ .../ThinLtoJIT/ThinLtoInstrumentationLayer.h | 78 ---- llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp | 339 ------------------ llvm/examples/ThinLtoJIT/ThinLtoJIT.h | 111 ------ .../ThinLtoJIT/ThinLtoModuleIndex.cpp | 268 -------------- llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h | 94 ----- llvm/examples/ThinLtoJIT/bench | 100 ------ llvm/examples/ThinLtoJIT/main.cpp | 83 ----- 12 files changed, 1440 deletions(-) delete mode 100644 llvm/examples/ThinLtoJIT/CMakeLists.txt delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoJIT.h delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp delete mode 100644 llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h delete mode 100755 llvm/examples/ThinLtoJIT/bench delete mode 100644 llvm/examples/ThinLtoJIT/main.cpp diff --git a/llvm/examples/CMakeLists.txt b/llvm/examples/CMakeLists.txt index 6d926d0bfba26..74613bd1350bd 100644 --- a/llvm/examples/CMakeLists.txt +++ b/llvm/examples/CMakeLists.txt @@ -8,7 +8,6 @@ add_subdirectory(ModuleMaker) add_subdirectory(OrcV2Examples) add_subdirectory(SpeculativeJIT) add_subdirectory(Bye) -add_subdirectory(ThinLtoJIT) if(LLVM_ENABLE_EH AND (NOT WIN32) AND (NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM")) add_subdirectory(ExceptionDemo) diff --git a/llvm/examples/ThinLtoJIT/CMakeLists.txt b/llvm/examples/ThinLtoJIT/CMakeLists.txt deleted file mode 100644 index c2b52dc815f2a..0000000000000 --- a/llvm/examples/ThinLtoJIT/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -set(LLVM_LINK_COMPONENTS - BitReader - Core - IRReader - OrcJIT - ExecutionEngine - Support - nativecodegen - Analysis - Passes - ) - -add_llvm_example(ThinLtoJIT - main.cpp - ThinLtoJIT.cpp - ThinLtoModuleIndex.cpp - ThinLtoInstrumentationLayer.cpp - ThinLtoDiscoveryThread.cpp - ) diff --git a/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp b/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp deleted file mode 100644 index 203532436ab7b..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "ThinLtoDiscoveryThread.h" - -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" - -#include "ThinLtoInstrumentationLayer.h" -#include "ThinLtoModuleIndex.h" - -#include - -#define DEBUG_TYPE "thinltojit" - -namespace llvm { -namespace orc { - -void ThinLtoDiscoveryThread::operator()() { - while (KeepRunning.load()) { - std::vector Indexes = Layer.takeFlagsThatFired(); - - if (!Indexes.empty()) { - LLVM_DEBUG(dbgs() << Indexes.size() << " new flags raised\n"); - auto ReachedFunctions = Layer.takeFlagOwners(std::move(Indexes)); - - for (GlobalValue::GUID F : ReachedFunctions) { - if (GlobalValueSummary *S = GlobalIndex.getSummary(F)) { - assert(isa(S) && "Reached symbols are functions"); - GlobalIndex.discoverCalleeModulePaths(cast(S), - LookaheadLevels); - } else { - LLVM_DEBUG(dbgs() << "No summary for GUID: " << F << "\n"); - } - } - - if (GlobalIndex.getNumDiscoveredModules() > 0) - spawnLookupForHighRankModules(); - } - } -} - -void ThinLtoDiscoveryThread::spawnLookupForHighRankModules() { - std::vector Paths = GlobalIndex.selectNextPaths(); - GlobalIndex.scheduleModuleParsing(Paths); - - // In order to add modules we need exclusive access to the execution session. - std::thread([this, Paths = std::move(Paths)]() { - ES.runSessionLocked([this, Paths = std::move(Paths)]() mutable { - for (const std::string &Path : Paths) { - ThreadSafeModule TSM = GlobalIndex.takeModule(Path); - if (!TSM) - // In the meantime the module was added synchronously. - continue; - - if (Error LoadErr = AddModule(std::move(TSM))) - // Failed to add the module to the session. - ES.reportError(std::move(LoadErr)); - - ++NumModulesSubmitted; - } - }); - }).detach(); -} - -} // namespace orc -} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h b/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h deleted file mode 100644 index 4ca3c95dee001..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef LLVM_EXAMPLES_THINLTOJIT_DISCOVERYTHREAD_H -#define LLVM_EXAMPLES_THINLTOJIT_DISCOVERYTHREAD_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/ModuleSummaryIndex.h" - -#include "ThinLtoJIT.h" - -#include -#include - -namespace llvm { -namespace orc { - -class ExecutionSession; -class ThinLtoModuleIndex; -class ThinLtoInstrumentationLayer; - -class ThinLtoDiscoveryThread { -public: - ThinLtoDiscoveryThread(std::atomic &RunningFlag, ExecutionSession &ES, - JITDylib *MainJD, ThinLtoInstrumentationLayer &L, - ThinLtoModuleIndex &GlobalIndex, - ThinLtoJIT::AddModuleFunction AddModule, - unsigned LookaheadLevels, bool PrintStats) - : KeepRunning(RunningFlag), ES(ES), Layer(L), GlobalIndex(GlobalIndex), - AddModule(std::move(AddModule)), LookaheadLevels(LookaheadLevels), - PrintStats(PrintStats) {} - - ~ThinLtoDiscoveryThread() { - if (PrintStats) - dump(errs()); - } - - void operator()(); - - void dump(raw_ostream &OS) { - OS << format("Modules submitted asynchronously: %d\n", NumModulesSubmitted); - } - -private: - std::atomic &KeepRunning; - ExecutionSession &ES; - ThinLtoInstrumentationLayer &Layer; - ThinLtoModuleIndex &GlobalIndex; - ThinLtoJIT::AddModuleFunction AddModule; - unsigned LookaheadLevels; - bool PrintStats; - unsigned NumModulesSubmitted{0}; - - void spawnLookupForHighRankModules(); -}; - -} // namespace orc -} // namespace llvm - -#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp deleted file mode 100644 index df844bf19b9cc..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp +++ /dev/null @@ -1,225 +0,0 @@ -#include "ThinLtoInstrumentationLayer.h" - -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Process.h" - -#include - -#define DEBUG_TYPE "thinltojit" - -namespace llvm { -namespace orc { - -// TODO: Fixed set of flags may not always be enough. Make this expandable. -void ThinLtoInstrumentationLayer::allocateDiscoveryFlags(unsigned MinFlags) { - // Round up to full memory pages. - unsigned PageSize = sys::Process::getPageSizeEstimate(); - unsigned NumPagesEach = (MinFlags + (PageSize - 1)) / PageSize; - unsigned NumPagesTotal = 2 * NumPagesEach; - assert(isPowerOf2_64(PageSize) && "Adjust aligned memory alloc below"); - - // Allocate one more page to make up for size loss due to alignment. - void *Storage = std::calloc(NumPagesTotal + 1, PageSize); - uint64_t StorageAddr = reinterpret_cast(Storage); - uint64_t PageSizeDecr = PageSize - 1; - uint64_t AlignedAddr = ((StorageAddr + PageSizeDecr) & ~PageSizeDecr); - uint64_t Diff = AlignedAddr - StorageAddr; - - // For each flag we allocate one byte in each location: Incoming and Handled. - // TODO: 'Handled' could be a bitset, but size must be dynamic - NumFlagsUsed.store(0); - NumFlagsAllocated = NumPagesEach * PageSize; - FlagsStorage = static_cast(Storage); - FlagsIncoming = reinterpret_cast(FlagsStorage + Diff); - FlagsHandled = FlagsIncoming + NumFlagsAllocated; - - static_assert(sizeof(FlagsIncoming[0]) == sizeof(uint8_t), "Flags are bytes"); - assert(reinterpret_cast(FlagsIncoming) % PageSize == 0); - assert(reinterpret_cast(FlagsHandled) % PageSize == 0); - assert(NumFlagsAllocated >= MinFlags); -} - -// Reserve a new set of discovery flags and return the index of the first one. -unsigned ThinLtoInstrumentationLayer::reserveDiscoveryFlags(unsigned Count) { -#ifndef NDEBUG - for (unsigned i = NumFlagsUsed.load(), e = i + Count; i < e; i++) { - assert(FlagsIncoming[i] == Clear); - } -#endif - - assert(Count > 0); - return NumFlagsUsed.fetch_add(Count); -} - -void ThinLtoInstrumentationLayer::registerDiscoveryFlagOwners( - std::vector Guids, unsigned FirstIdx) { - unsigned Count = Guids.size(); - - std::lock_guard Lock(DiscoveryFlagsInfoLock); - for (unsigned i = 0; i < Count; i++) { - assert(!FlagOwnersMap.count(FirstIdx + i) && - "Flag should not have an owner at this point"); - FlagOwnersMap[FirstIdx + i] = Guids[i]; - } -} - -std::vector ThinLtoInstrumentationLayer::takeFlagsThatFired() { - // This is only effective with the respective Release. - FlagsSync.load(std::memory_order_acquire); - - std::vector Indexes; - unsigned NumIndexesUsed = NumFlagsUsed.load(); - for (unsigned i = 0; i < NumIndexesUsed; i++) { - if (FlagsIncoming[i] == Fired && FlagsHandled[i] == Clear) { - FlagsHandled[i] = Fired; - Indexes.push_back(i); - } - } - - return Indexes; -} - -std::vector -ThinLtoInstrumentationLayer::takeFlagOwners(std::vector Indexes) { - std::vector ReachedFunctions; - std::lock_guard Lock(DiscoveryFlagsInfoLock); - - for (unsigned i : Indexes) { - auto KV = FlagOwnersMap.find(i); - assert(KV != FlagOwnersMap.end()); - ReachedFunctions.push_back(KV->second); - FlagOwnersMap.erase(KV); - } - - return ReachedFunctions; -} - -void ThinLtoInstrumentationLayer::nudgeIntoDiscovery( - std::vector Functions) { - unsigned Count = Functions.size(); - - // Registering synthetic flags in advance. We expect them to get processed - // before the respective functions get emitted. If not, the emit() function - unsigned FirstFlagIdx = reserveDiscoveryFlags(Functions.size()); - registerDiscoveryFlagOwners(std::move(Functions), FirstFlagIdx); - - // Initialize the flags as fired and force a cache sync, so discovery will - // pick them up as soon as possible. - for (unsigned i = FirstFlagIdx; i < FirstFlagIdx + Count; i++) { - FlagsIncoming[i] = Fired; - } - if (MemFence & ThinLtoJIT::FenceStaticCode) { - FlagsSync.store(0, std::memory_order_release); - } - - LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n"); -} - -void ThinLtoInstrumentationLayer::emit( - std::unique_ptr R, ThreadSafeModule TSM) { - TSM.withModuleDo([this](Module &M) { - std::vector FunctionsToInstrument; - - // We may have discovered ahead of some functions already, but we still - // instrument them all. Their notifications steer the future direction of - // discovery. - for (Function &F : M.getFunctionList()) - if (!F.isDeclaration()) - FunctionsToInstrument.push_back(&F); - - if (!FunctionsToInstrument.empty()) { - IRBuilder<> B(M.getContext()); - std::vector NewDiscoveryRoots; - - // Flags that fire must have owners registered. We will do it below and - // that's fine, because they can only be reached once the code is emitted. - unsigned FirstFlagIdx = - reserveDiscoveryFlags(FunctionsToInstrument.size()); - - unsigned NextFlagIdx = FirstFlagIdx; - for (Function *F : FunctionsToInstrument) { - // TODO: Emitting the write operation into an indirection stub would - // allow to skip it once we got the notification. - BasicBlock *E = &F->getEntryBlock(); - B.SetInsertPoint(BasicBlock::Create( - M.getContext(), "NotifyFunctionReachedProlog", F, E)); - compileFunctionReachedFlagSetter(B, FlagsIncoming + NextFlagIdx); - B.CreateBr(E); - - std::string GlobalName = GlobalValue::getGlobalIdentifier( - F->getName(), F->getLinkage(), M.getSourceFileName()); - NewDiscoveryRoots.push_back(GlobalValue::getGUID(GlobalName)); - ++NextFlagIdx; - } - - LLVM_DEBUG(dbgs() << "Instrumented " << NewDiscoveryRoots.size() - << " new functions in module " << M.getName() << "\n"); - - // Submit owner info, so the DiscoveryThread can evaluate the flags. - registerDiscoveryFlagOwners(std::move(NewDiscoveryRoots), FirstFlagIdx); - } - }); - - BaseLayer.emit(std::move(R), std::move(TSM)); -} - -void ThinLtoInstrumentationLayer::compileFunctionReachedFlagSetter( - IRBuilder<> &B, Flag *F) { - assert(*F == Clear); - Type *Int64Ty = Type::getInt64Ty(B.getContext()); - - // Write one immediate 8bit value to a fixed location in memory. - auto FlagAddr = pointerToJITTargetAddress(F); - Type *FlagTy = Type::getInt8Ty(B.getContext()); - B.CreateStore(ConstantInt::get(FlagTy, Fired), - B.CreateIntToPtr(ConstantInt::get(Int64Ty, FlagAddr), - FlagTy->getPointerTo())); - - if (MemFence & ThinLtoJIT::FenceJITedCode) { - // Overwrite the sync value with Release ordering. The discovery thread - // reads it with Acquire ordering. The actual value doesn't matter. - static constexpr bool IsVolatile = true; - static constexpr Instruction *NoInsertBefore = nullptr; - auto SyncFlagAddr = pointerToJITTargetAddress(&FlagsSync); - - B.Insert( - new StoreInst(ConstantInt::get(Int64Ty, 0), - B.CreateIntToPtr(ConstantInt::get(Int64Ty, SyncFlagAddr), - Int64Ty->getPointerTo()), - IsVolatile, Align(64), AtomicOrdering::Release, - SyncScope::System, NoInsertBefore)); - } -} - -void ThinLtoInstrumentationLayer::dump(raw_ostream &OS) { - OS << "Discovery flags stats\n"; - - unsigned NumFlagsFired = 0; - for (unsigned i = 0; i < NumFlagsAllocated; i++) { - if (FlagsIncoming[i] == Fired) - ++NumFlagsFired; - } - OS << "Alloc: " << format("%6.d", NumFlagsAllocated) << "\n"; - OS << "Issued: " << format("%6.d", NumFlagsUsed.load()) << "\n"; - OS << "Fired: " << format("%6.d", NumFlagsFired) << "\n"; - - unsigned RemainingFlagOwners = 0; - for (const auto &_ : FlagOwnersMap) { - ++RemainingFlagOwners; - (void)_; - } - OS << "\nFlagOwnersMap has " << RemainingFlagOwners - << " remaining entries.\n"; -} - -ThinLtoInstrumentationLayer::~ThinLtoInstrumentationLayer() { - std::free(FlagsStorage); -} - -} // namespace orc -} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h deleted file mode 100644 index 25006b40607fe..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef LLVM_EXAMPLES_THINLTOJIT_DISCOVERYLAYER_H -#define LLVM_EXAMPLES_THINLTOJIT_DISCOVERYLAYER_H - -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/Core.h" -#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" -#include "llvm/ExecutionEngine/Orc/Layer.h" -#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" - -#include "ThinLtoJIT.h" - -#include -#include -#include -#include -#include - -namespace llvm { -namespace orc { - -class ThinLtoInstrumentationLayer : public IRLayer { -public: - ThinLtoInstrumentationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, - ThinLtoJIT::ExplicitMemoryBarrier MemFence, - unsigned FlagsPerBucket) - : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), - MemFence(MemFence) { - // TODO: So far we only allocate one bucket. - allocateDiscoveryFlags(FlagsPerBucket); - } - - ~ThinLtoInstrumentationLayer() override; - - void emit(std::unique_ptr R, - ThreadSafeModule TSM) override; - - unsigned reserveDiscoveryFlags(unsigned Count); - void registerDiscoveryFlagOwners(std::vector Guids, - unsigned FirstIdx); - - void nudgeIntoDiscovery(std::vector Functions); - - std::vector takeFlagsThatFired(); - std::vector takeFlagOwners(std::vector Indexes); - - void dump(raw_ostream &OS); - -private: - IRCompileLayer &BaseLayer; - ThinLtoJIT::ExplicitMemoryBarrier MemFence; - - enum Flag : uint8_t { Clear = 0, Fired = 1 }; - - // Lock-free read access. - uint8_t *FlagsStorage; - Flag *FlagsIncoming; // lock-free write by design - Flag *FlagsHandled; - unsigned NumFlagsAllocated; - std::atomic NumFlagsUsed; // spin-lock - - // Acquire/release sync between writers and reader - std::atomic FlagsSync; - - // STL container requires locking for both, read and write access. - mutable std::mutex DiscoveryFlagsInfoLock; - std::map FlagOwnersMap; - - void allocateDiscoveryFlags(unsigned MinFlags); - void compileFunctionReachedFlagSetter(IRBuilder<> &B, Flag *F); -}; - -} // namespace orc -} // namespace llvm - -#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp b/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp deleted file mode 100644 index e668be7d11b7e..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp +++ /dev/null @@ -1,339 +0,0 @@ -#include "ThinLtoJIT.h" - -#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" -#include "llvm/ExecutionEngine/Orc/CompileUtils.h" -#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" -#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" -#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" -#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Host.h" - -#include "ThinLtoDiscoveryThread.h" -#include "ThinLtoInstrumentationLayer.h" -#include "ThinLtoModuleIndex.h" - -#include -#include -#include - -#ifndef NDEBUG -#include -#endif - -#define DEBUG_TYPE "thinltojit" - -namespace llvm { -namespace orc { - -class ThinLtoDefinitionGenerator : public JITDylib::DefinitionGenerator { -public: - ThinLtoDefinitionGenerator(ThinLtoModuleIndex &GlobalIndex, - ThinLtoInstrumentationLayer &InstrumentationLayer, - ThinLtoJIT::AddModuleFunction AddModule, - char Prefix, bool AllowNudge, bool PrintStats) - : GlobalIndex(GlobalIndex), InstrumentationLayer(InstrumentationLayer), - AddModule(std::move(AddModule)), ManglePrefix(Prefix), - AllowNudgeIntoDiscovery(AllowNudge), PrintStats(PrintStats) {} - - ~ThinLtoDefinitionGenerator() { - if (PrintStats) - dump(errs()); - } - - Error tryToGenerate(LookupKind K, JITDylib &JD, - JITDylibLookupFlags JDLookupFlags, - const SymbolLookupSet &Symbols) override; - - void dump(raw_ostream &OS) { - OS << format("Modules submitted synchronously: %d\n", NumModulesMissed); - } - -private: - ThinLtoModuleIndex &GlobalIndex; - ThinLtoInstrumentationLayer &InstrumentationLayer; - ThinLtoJIT::AddModuleFunction AddModule; - char ManglePrefix; - bool AllowNudgeIntoDiscovery; - bool PrintStats; - unsigned NumModulesMissed{0}; - - // ThinLTO summaries encode unprefixed names. - StringRef stripGlobalManglePrefix(StringRef Symbol) const { - bool Strip = (ManglePrefix != '\0' && Symbol[0] == ManglePrefix); - return Strip ? StringRef(Symbol.data() + 1, Symbol.size() - 1) : Symbol; - } -}; - -Error ThinLtoDefinitionGenerator::tryToGenerate( - LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, - const SymbolLookupSet &Symbols) { - std::set ModulePaths; - std::vector NewDiscoveryRoots; - - for (const auto &KV : Symbols) { - StringRef UnmangledName = stripGlobalManglePrefix(*KV.first); - auto Guid = GlobalValue::getGUID(UnmangledName); - if (GlobalValueSummary *S = GlobalIndex.getSummary(Guid)) { - // We could have discovered it ahead of time. - LLVM_DEBUG(dbgs() << format("Failed to discover symbol: %s\n", - UnmangledName.str().c_str())); - ModulePaths.insert(S->modulePath()); - if (AllowNudgeIntoDiscovery && isa(S)) { - NewDiscoveryRoots.push_back(Guid); - } - } - } - - NumModulesMissed += ModulePaths.size(); - - // Parse the requested modules if it hasn't happened yet. - GlobalIndex.scheduleModuleParsing(ModulePaths); - - for (StringRef Path : ModulePaths) { - ThreadSafeModule TSM = GlobalIndex.takeModule(Path); - assert(TSM && "We own the session lock, no asynchronous access possible"); - - if (Error LoadErr = AddModule(std::move(TSM))) - // Failed to add the module to the session. - return LoadErr; - - LLVM_DEBUG(dbgs() << "Generator: added " << Path << " synchronously\n"); - } - - // Requested functions that we failed to discover ahead of time, are likely - // close to the execution front. We can anticipate to run into them as soon - // as execution continues and trigger their discovery flags already now. This - // behavior is enabled with the 'allow-nudge' option and implemented below. - // On the one hand, it may give us a head start in a moment where discovery - // was lacking behind. On the other hand, we may bet on the wrong horse and - // waste extra time speculating in the wrong direction. - if (!NewDiscoveryRoots.empty()) { - assert(AllowNudgeIntoDiscovery); - InstrumentationLayer.nudgeIntoDiscovery(std::move(NewDiscoveryRoots)); - } - - return Error::success(); -} - -ThinLtoJIT::ThinLtoJIT(ArrayRef InputFiles, - StringRef MainFunctionName, unsigned LookaheadLevels, - unsigned NumCompileThreads, unsigned NumLoadThreads, - unsigned DiscoveryFlagsPerBucket, - ExplicitMemoryBarrier MemFence, - bool AllowNudgeIntoDiscovery, bool PrintStats, - Error &Err) { - ErrorAsOutParameter ErrAsOutParam(&Err); - - // Populate the module index, so we know which modules exist and we can find - // the one that defines the main function. - GlobalIndex = std::make_unique(ES, NumLoadThreads); - for (StringRef F : InputFiles) { - if (auto Err = GlobalIndex->add(F)) - ES.reportError(std::move(Err)); - } - - // Load the module that defines the main function. - auto TSM = setupMainModule(MainFunctionName); - if (!TSM) { - Err = TSM.takeError(); - return; - } - - // Infer target-specific utils from the main module. - ThreadSafeModule MainModule = std::move(*TSM); - auto JTMB = setupTargetUtils(MainModule.getModuleUnlocked()); - if (!JTMB) { - Err = JTMB.takeError(); - return; - } - - // Set up the JIT compile pipeline. - setupLayers(std::move(*JTMB), NumCompileThreads, DiscoveryFlagsPerBucket, - MemFence); - - // We can use the mangler now. Remember the mangled name of the main function. - MainFunctionMangled = (*Mangle)(MainFunctionName); - - // We are restricted to a single dylib currently. Add runtime overrides and - // symbol generators. - MainJD = &ES.createBareJITDylib("main"); - Err = setupJITDylib(MainJD, AllowNudgeIntoDiscovery, PrintStats); - if (Err) - return; - - // Spawn discovery thread and let it add newly discovered modules to the JIT. - setupDiscovery(MainJD, LookaheadLevels, PrintStats); - - Err = AddModule(std::move(MainModule)); - if (Err) - return; - - if (AllowNudgeIntoDiscovery) { - auto MainFunctionGuid = GlobalValue::getGUID(MainFunctionName); - InstrumentationLayer->nudgeIntoDiscovery({MainFunctionGuid}); - } -} - -Expected ThinLtoJIT::setupMainModule(StringRef MainFunction) { - Optional M = GlobalIndex->getModulePathForSymbol(MainFunction); - if (!M) { - std::string Buffer; - raw_string_ostream OS(Buffer); - OS << "No ValueInfo for symbol '" << MainFunction; - OS << "' in provided modules: "; - for (StringRef P : GlobalIndex->getAllModulePaths()) - OS << P << " "; - OS << "\n"; - return createStringError(inconvertibleErrorCode(), OS.str()); - } - - if (auto TSM = GlobalIndex->parseModuleFromFile(*M)) - return std::move(TSM); // Not a redundant move: fix build on gcc-7.5 - - return createStringError(inconvertibleErrorCode(), - "Failed to parse main module"); -} - -Expected ThinLtoJIT::setupTargetUtils(Module *M) { - std::string T = M->getTargetTriple(); - JITTargetMachineBuilder JTMB(Triple(T.empty() ? sys::getProcessTriple() : T)); - - // CallThroughManager is ABI-specific - auto LCTM = createLocalLazyCallThroughManager( - JTMB.getTargetTriple(), ES, - pointerToJITTargetAddress(exitOnLazyCallThroughFailure)); - if (!LCTM) - return LCTM.takeError(); - CallThroughManager = std::move(*LCTM); - - // Use DataLayout or the given module or fall back to the host's default. - DL = DataLayout(M); - if (DL.getStringRepresentation().empty()) { - auto HostDL = JTMB.getDefaultDataLayoutForTarget(); - if (!HostDL) - return HostDL.takeError(); - DL = std::move(*HostDL); - if (Error Err = applyDataLayout(M)) - return std::move(Err); - } - - // Now that we know the target data layout we can setup the mangler. - Mangle = std::make_unique(ES, DL); - return JTMB; -} - -Error ThinLtoJIT::applyDataLayout(Module *M) { - if (M->getDataLayout().isDefault()) - M->setDataLayout(DL); - - if (M->getDataLayout() != DL) - return make_error( - "Added modules have incompatible data layouts", - inconvertibleErrorCode()); - - return Error::success(); -} - -static bool IsTrivialModule(MaterializationUnit *MU) { - StringRef ModuleName = MU->getName(); - return ModuleName == "" || ModuleName == "" || - ModuleName == ""; -} - -void ThinLtoJIT::setupLayers(JITTargetMachineBuilder JTMB, - unsigned NumCompileThreads, - unsigned DiscoveryFlagsPerBucket, - ExplicitMemoryBarrier MemFence) { - ObjLinkingLayer = std::make_unique( - ES, []() { return std::make_unique(); }); - - CompileLayer = std::make_unique( - ES, *ObjLinkingLayer, std::make_unique(JTMB)); - - InstrumentationLayer = std::make_unique( - ES, *CompileLayer, MemFence, DiscoveryFlagsPerBucket); - - OnDemandLayer = std::make_unique( - ES, *InstrumentationLayer, *CallThroughManager, - createLocalIndirectStubsManagerBuilder(JTMB.getTargetTriple())); - // Don't break up modules. Insert stubs on module boundaries. - OnDemandLayer->setPartitionFunction(CompileOnDemandLayer::compileWholeModule); - - // Delegate compilation to the thread pool. - CompileThreads = std::make_unique( - llvm::hardware_concurrency(NumCompileThreads)); - ES.setDispatchMaterialization( - [this](std::unique_ptr MU, - std::unique_ptr MR) { - if (IsTrivialModule(MU.get())) { - // This should be quick and we may save a few session locks. - MU->materialize(std::move(MR)); - } else { - // FIXME: Drop the std::shared_ptr workaround once ThreadPool::async() - // accepts llvm::unique_function to define jobs. - CompileThreads->async( - [UnownedMU = MU.release(), UnownedMR = MR.release()]() { - std::unique_ptr MU(UnownedMU); - std::unique_ptr MR(UnownedMR); - MU->materialize(std::move(MR)); - }); - } - }); - - AddModule = [this](ThreadSafeModule TSM) -> Error { - assert(MainJD && "Setup MainJD JITDylib before calling"); - Module *M = TSM.getModuleUnlocked(); - if (Error Err = applyDataLayout(M)) - return Err; - VModuleKey Id = GlobalIndex->getModuleId(M->getName()); - return OnDemandLayer->add(*MainJD, std::move(TSM), Id); - }; -} - -void ThinLtoJIT::setupDiscovery(JITDylib *MainJD, unsigned LookaheadLevels, - bool PrintStats) { - JitRunning.store(true); - DiscoveryThreadWorker = std::make_unique( - JitRunning, ES, MainJD, *InstrumentationLayer, *GlobalIndex, AddModule, - LookaheadLevels, PrintStats); - - DiscoveryThread = std::thread(std::ref(*DiscoveryThreadWorker)); -} - -Error ThinLtoJIT::setupJITDylib(JITDylib *JD, bool AllowNudge, - bool PrintStats) { - // Register symbols for C++ static destructors. - LocalCXXRuntimeOverrides CXXRuntimeoverrides; - Error Err = CXXRuntimeoverrides.enable(*JD, *Mangle); - if (Err) - return Err; - - // Lookup symbol names in the global ThinLTO module index first - char Prefix = DL.getGlobalPrefix(); - JD->addGenerator(std::make_unique( - *GlobalIndex, *InstrumentationLayer, AddModule, Prefix, AllowNudge, - PrintStats)); - - // Then try lookup in the host process. - auto HostLookup = DynamicLibrarySearchGenerator::GetForCurrentProcess(Prefix); - if (!HostLookup) - return HostLookup.takeError(); - JD->addGenerator(std::move(*HostLookup)); - - return Error::success(); -} - -ThinLtoJIT::~ThinLtoJIT() { - // Signal the DiscoveryThread to shut down. - JitRunning.store(false); - DiscoveryThread.join(); - - // Wait for potential compile actions to finish. - CompileThreads->wait(); -} - -} // namespace orc -} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoJIT.h b/llvm/examples/ThinLtoJIT/ThinLtoJIT.h deleted file mode 100644 index 4c2fddfd577a9..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoJIT.h +++ /dev/null @@ -1,111 +0,0 @@ -#ifndef LLVM_EXAMPLES_THINLTOJIT_THINLTOJIT_H -#define LLVM_EXAMPLES_THINLTOJIT_THINLTOJIT_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" -#include "llvm/ExecutionEngine/Orc/Core.h" -#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" -#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" -#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ThreadPool.h" - -#include -#include -#include -#include -#include - -namespace llvm { -namespace orc { - -class ThinLtoDiscoveryThread; -class ThinLtoInstrumentationLayer; -class ThinLtoModuleIndex; - -class CompileOnDemandLayer; -class IRCompileLayer; -class RTDyldObjectLinkingLayer; - -class JITDylib; -class JITTargetMachineBuilder; -class LazyCallThroughManager; -class MangleAndInterner; - -class ThinLtoJIT { -public: - using AddModuleFunction = std::function; - - enum ExplicitMemoryBarrier { - NeverFence = 0, - FenceStaticCode = 1, - FenceJITedCode = 2, - AlwaysFence = 3 - }; - - ThinLtoJIT(ArrayRef InputFiles, StringRef MainFunctionName, - unsigned LookaheadLevels, unsigned NumCompileThreads, - unsigned NumLoadThreads, unsigned DiscoveryFlagsPerBucket, - ExplicitMemoryBarrier MemFence, bool AllowNudgeIntoDiscovery, - bool PrintStats, Error &Err); - ~ThinLtoJIT(); - - ThinLtoJIT(const ThinLtoJIT &) = delete; - ThinLtoJIT &operator=(const ThinLtoJIT &) = delete; - ThinLtoJIT(ThinLtoJIT &&) = delete; - ThinLtoJIT &operator=(ThinLtoJIT &&) = delete; - - Expected main(ArrayRef Args) { - auto MainSym = ES.lookup({MainJD}, MainFunctionMangled); - if (!MainSym) - return MainSym.takeError(); - - using MainFn = int(int, char *[]); - auto Main = jitTargetAddressToFunction(MainSym->getAddress()); - - return runAsMain(Main, Args, StringRef("ThinLtoJIT")); - } - -private: - ExecutionSession ES; - DataLayout DL{""}; - - JITDylib *MainJD; - SymbolStringPtr MainFunctionMangled; - std::unique_ptr CompileThreads; - std::unique_ptr GlobalIndex; - - AddModuleFunction AddModule; - std::unique_ptr ObjLinkingLayer; - std::unique_ptr CompileLayer; - std::unique_ptr InstrumentationLayer; - std::unique_ptr OnDemandLayer; - - std::atomic JitRunning; - std::thread DiscoveryThread; - std::unique_ptr DiscoveryThreadWorker; - - std::unique_ptr Mangle; - std::unique_ptr CallThroughManager; - - void setupLayers(JITTargetMachineBuilder JTMB, unsigned NumCompileThreads, - unsigned DiscoveryFlagsPerBucket, - ExplicitMemoryBarrier MemFence); - Error setupJITDylib(JITDylib *JD, bool AllowNudge, bool PrintStats); - void setupDiscovery(JITDylib *MainJD, unsigned LookaheadLevels, - bool PrintStats); - Expected setupMainModule(StringRef MainFunction); - Expected setupTargetUtils(Module *M); - Error applyDataLayout(Module *M); - - static void exitOnLazyCallThroughFailure() { - errs() << "Compilation failed. Aborting.\n"; - exit(1); - } -}; - -} // namespace orc -} // namespace llvm - -#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp b/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp deleted file mode 100644 index 42ee43f1091ba..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp +++ /dev/null @@ -1,268 +0,0 @@ -#include "ThinLtoModuleIndex.h" - -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" - -#include -#include - -#define DEBUG_TYPE "thinltojit" - -namespace llvm { -namespace orc { - -Error ThinLtoModuleIndex::add(StringRef InputPath) { - auto Buffer = errorOrToExpected(MemoryBuffer::getFile(InputPath)); - if (!Buffer) - return Buffer.takeError(); - - Error ParseErr = readModuleSummaryIndex((*Buffer)->getMemBufferRef(), - CombinedSummaryIndex, NextModuleId); - if (ParseErr) - return ParseErr; - -#ifndef NDEBUG - auto Paths = getAllModulePaths(); - unsigned TotalPaths = Paths.size(); - std::sort(Paths.begin(), Paths.end()); - Paths.erase(std::unique(Paths.begin(), Paths.end()), Paths.end()); - assert(TotalPaths == Paths.size() && "Module paths must be unique"); -#endif - - ++NextModuleId; - return Error::success(); -} - -std::vector ThinLtoModuleIndex::getAllModulePaths() const { - auto ModuleTable = CombinedSummaryIndex.modulePaths(); - - std::vector Paths; - Paths.resize(ModuleTable.size()); - - for (const auto &KV : ModuleTable) { - assert(Paths[KV.second.first].empty() && "IDs are unique and continuous"); - Paths[KV.second.first] = KV.first(); - } - - return Paths; -} - -GlobalValueSummary * -ThinLtoModuleIndex::getSummary(GlobalValue::GUID Function) const { - ValueInfo VI = CombinedSummaryIndex.getValueInfo(Function); - if (!VI || VI.getSummaryList().empty()) - return nullptr; - - // There can be more than one symbol with the same GUID, in the case of same- - // named locals in different but same-named source files that were compiled in - // their respective directories (so the source file name and resulting GUID is - // the same). We avoid this by checking that module paths are unique upon - // add(). - // - // TODO: We can still get duplicates on symbols declared with - // attribute((weak)), a GNU extension supported by gcc and clang. - // We should support it by looking for a symbol in the current module - // or in the same module as the caller. - assert(VI.getSummaryList().size() == 1 && "Weak symbols not yet supported"); - - return VI.getSummaryList().front().get()->getBaseObject(); -} - -Optional -ThinLtoModuleIndex::getModulePathForSymbol(StringRef Name) const { - if (GlobalValueSummary *S = getSummary(GlobalValue::getGUID(Name))) - return S->modulePath(); - return None; // We don't know the symbol. -} - -void ThinLtoModuleIndex::scheduleModuleParsingPrelocked(StringRef Path) { - // Once the module was scheduled, we can call takeModule(). - auto ScheduledIt = ScheduledModules.find(Path); - if (ScheduledIt != ScheduledModules.end()) - return; - - auto Worker = [this](std::string Path) { - if (auto TSM = doParseModule(Path)) { - std::lock_guard Lock(ParsedModulesLock); - ParsedModules[Path] = std::move(*TSM); - - LLVM_DEBUG(dbgs() << "Finished parsing module: " << Path << "\n"); - } else { - ES.reportError(TSM.takeError()); - } - }; - - LLVM_DEBUG(dbgs() << "Schedule module for parsing: " << Path << "\n"); - ScheduledModules[Path] = ParseModuleWorkers.async(Worker, Path.str()); -} - -ThreadSafeModule ThinLtoModuleIndex::takeModule(StringRef Path) { - std::unique_lock ParseLock(ParsedModulesLock); - - auto ParsedIt = ParsedModules.find(Path); - if (ParsedIt == ParsedModules.end()) { - ParseLock.unlock(); - - // The module is not ready, wait for the future we stored. - std::unique_lock ScheduleLock(ScheduledModulesLock); - auto ScheduledIt = ScheduledModules.find(Path); - assert(ScheduledIt != ScheduledModules.end() && - "Don't call for unscheduled modules"); - std::shared_future Future = ScheduledIt->getValue(); - ScheduleLock.unlock(); - Future.get(); - - ParseLock.lock(); - ParsedIt = ParsedModules.find(Path); - assert(ParsedIt != ParsedModules.end() && "Must be ready now"); - } - - // We only add each module once. If it's not here anymore, we can skip it. - ThreadSafeModule TSM = std::move(ParsedIt->getValue()); - ParsedIt->getValue() = ThreadSafeModule(); - return TSM; -} - -ThreadSafeModule ThinLtoModuleIndex::parseModuleFromFile(StringRef Path) { - { - std::lock_guard ScheduleLock(ScheduledModulesLock); - scheduleModuleParsingPrelocked(Path); - } - return takeModule(Path); -} - -Expected ThinLtoModuleIndex::doParseModule(StringRef Path) { - // TODO: make a SMDiagnosticError class for this - SMDiagnostic Err; - auto Ctx = std::make_unique(); - auto M = parseIRFile(Path, Err, *Ctx); - if (!M) { - std::string ErrDescription; - { - raw_string_ostream S(ErrDescription); - Err.print("ThinLtoJIT", S); - } - return createStringError(inconvertibleErrorCode(), - "Failed to load module from file '%s' (%s)", - Path.data(), ErrDescription.c_str()); - } - - return ThreadSafeModule(std::move(M), std::move(Ctx)); -} - -// We don't filter visited functions. Discovery will often be retriggered -// from the middle of already visited functions and it aims to reach a little -// further each time. -void ThinLtoModuleIndex::discoverCalleeModulePaths(FunctionSummary *S, - unsigned LookaheadLevels) { - // Populate initial worklist - std::vector Worklist; - addToWorklist(Worklist, S->calls()); - unsigned Distance = 0; - - while (++Distance < LookaheadLevels) { - // Process current worklist and populate a new one. - std::vector NextWorklist; - for (FunctionSummary *F : Worklist) { - updatePathRank(F->modulePath(), Distance); - addToWorklist(NextWorklist, F->calls()); - } - Worklist = std::move(NextWorklist); - } - - // Process the last worklist without filling a new one - for (FunctionSummary *F : Worklist) { - updatePathRank(F->modulePath(), Distance); - } - - // Reset counts for known paths (includes both, scheduled and parsed modules). - std::lock_guard Lock(ScheduledModulesLock); - for (const auto &KV : ScheduledModules) { - PathRank[KV.first()].Count = 0; - } -} - -void ThinLtoModuleIndex::addToWorklist( - std::vector &List, - ArrayRef Calls) { - for (const auto &Edge : Calls) { - const auto &SummaryList = Edge.first.getSummaryList(); - if (!SummaryList.empty()) { - GlobalValueSummary *S = SummaryList.front().get()->getBaseObject(); - assert(isa(S) && "Callees must be functions"); - List.push_back(cast(S)); - } - } -} - -// PathRank is global and continuous. -void ThinLtoModuleIndex::updatePathRank(StringRef Path, unsigned Distance) { - auto &Entry = PathRank[Path]; - Entry.Count += 1; - Entry.MinDist = std::min(Entry.MinDist, Distance); - assert(Entry.MinDist > 0 && "We want it as a divisor"); -} - -// TODO: The size of a ThreadPool's task queue is not accessible. It would -// be great to know in order to estimate how many modules we schedule. The -// more we schedule, the less precise is the ranking. The less we schedule, -// the higher the risk for downtime. -std::vector ThinLtoModuleIndex::selectNextPaths() { - struct ScorePath { - float Score; - unsigned MinDist; - StringRef Path; - }; - - std::vector Candidates; - Candidates.reserve(PathRank.size()); - for (const auto &KV : PathRank) { - float Score = static_cast(KV.second.Count) / KV.second.MinDist; - if (Score > .0f) { - Candidates.push_back({Score, KV.second.MinDist, KV.first()}); - } - } - - // Sort candidates by descending score. - std::sort(Candidates.begin(), Candidates.end(), - [](const ScorePath &LHS, const ScorePath &RHS) { - return LHS.Score > RHS.Score; - }); - - // Sort highest score candidates by ascending minimal distance. - size_t Selected = - std::min(std::max(NumParseModuleThreads, Candidates.size() / 2), - Candidates.size()); - std::sort(Candidates.begin(), Candidates.begin() + Selected, - [](const ScorePath &LHS, const ScorePath &RHS) { - return LHS.MinDist < RHS.MinDist; - }); - - std::vector Paths; - Paths.reserve(Selected); - for (unsigned i = 0; i < Selected; i++) { - Paths.push_back(Candidates[i].Path.str()); - } - - LLVM_DEBUG(dbgs() << "ModuleIndex: select " << Paths.size() << " out of " - << Candidates.size() << " discovered paths\n"); - - return Paths; -} - -unsigned ThinLtoModuleIndex::getNumDiscoveredModules() const { - // TODO: It would probably be more efficient to track the number of - // unscheduled modules. - unsigned NonNullItems = 0; - for (const auto &KV : PathRank) - if (KV.second.Count > 0) - ++NonNullItems; - return NonNullItems; -} - -} // namespace orc -} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h b/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h deleted file mode 100644 index 29a24a0c5e147..0000000000000 --- a/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef LLVM_EXAMPLES_THINLTOJIT_THINLTOJITMODULEINDEX_H -#define LLVM_EXAMPLES_THINLTOJIT_THINLTOJITMODULEINDEX_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ExecutionEngine/Orc/Core.h" -#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ThreadPool.h" - -#include -#include -#include -#include -#include - -namespace llvm { -namespace orc { - -class SymbolStringPtr; - -class ThinLtoModuleIndex { - static constexpr bool HaveGVs = false; - -public: - ThinLtoModuleIndex(ExecutionSession &ES, unsigned ParseModuleThreads) - : ES(ES), CombinedSummaryIndex(HaveGVs), - ParseModuleWorkers(llvm::hardware_concurrency(ParseModuleThreads)), - NumParseModuleThreads(ParseModuleThreads) {} - - Error add(StringRef InputPath); - GlobalValueSummary *getSummary(GlobalValue::GUID Function) const; - std::vector getAllModulePaths() const; - Optional getModulePathForSymbol(StringRef Name) const; - - template void scheduleModuleParsing(const RangeT &Paths); - ThreadSafeModule takeModule(StringRef Path); - - // Blocking module parsing, returns a Null-module on error. - // Only used for the main module. - ThreadSafeModule parseModuleFromFile(StringRef Path); - - std::vector selectNextPaths(); - unsigned getNumDiscoveredModules() const; - void discoverCalleeModulePaths(FunctionSummary *S, unsigned LookaheadLevels); - - VModuleKey getModuleId(StringRef Path) const { - return CombinedSummaryIndex.getModuleId(Path); - } - -private: - ExecutionSession &ES; - ModuleSummaryIndex CombinedSummaryIndex; - uint64_t NextModuleId{0}; - - struct PathRankEntry { - uint32_t Count{0}; - uint32_t MinDist{100}; - }; - StringMap PathRank; - - ThreadPool ParseModuleWorkers; - unsigned NumParseModuleThreads; - - std::mutex ScheduledModulesLock; - StringMap> ScheduledModules; - - std::mutex ParsedModulesLock; - StringMap ParsedModules; - - void updatePathRank(StringRef Path, unsigned Distance); - void addToWorklist(std::vector &List, - ArrayRef Calls); - - std::vector selectAllPaths(); - std::vector selectHotPaths(unsigned Count); - - void scheduleModuleParsingPrelocked(StringRef Path); - Expected doParseModule(StringRef Path); -}; - -template -inline void ThinLtoModuleIndex::scheduleModuleParsing(const RangeT &Paths) { - std::lock_guard Lock(ScheduledModulesLock); - for (const auto &Path : Paths) { - scheduleModuleParsingPrelocked(Path); - } -} - -} // namespace orc -} // namespace llvm - -#endif diff --git a/llvm/examples/ThinLtoJIT/bench b/llvm/examples/ThinLtoJIT/bench deleted file mode 100755 index 796697eb5948e..0000000000000 --- a/llvm/examples/ThinLtoJIT/bench +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -#set -x - -if [ $# -gt 2 ]; then - TOOLS_DIR="$1" - SOURCE_DIR="$2" - MAIN_SOURCE_FILE="$3" -else - echo "Usage: bench
[]" - exit 1 -fi - -if [ $# -gt 3 ]; then - SYS_ROOT="$4" -else - SYS_ROOT="/" -fi - -function check_tool () -{ - if [ -e "${TOOLS_DIR}/$1" ]; then - echo "Found: $1" - else - echo "!!! Cannot find required tool, please provide it in the LLVM binaries folder: $1" - fi -} - -check_tool lli -check_tool SpeculativeJIT -check_tool ThinLtoJIT - -SKIP_BITCODE_GEN=0 -if [[ -e bc-default || -e bc-thinlto || -e ll-default || -e ll-thinlto ]]; then - echo "Skipping bitcode generation: output directories existing" - echo "Please clean up manually: rm -R bc-default bc-thinlto ll-default ll-thinlto" - SKIP_BITCODE_GEN=1 -else - check_tool clang - check_tool llvm-dis - check_tool llvm-lto - mkdir bc-default - mkdir bc-thinlto - mkdir ll-default - mkdir ll-thinlto -fi - -ROOT_DIR=$(pwd) -ALL_BITCODE_FILES="" - -MAIN_FILE_BASENAME=$(basename "${MAIN_SOURCE_FILE%.c*}") -LLI_EXTRA_MODULES="" - -for f in ${SOURCE_DIR}/*.c* ; do - BASE_NAME=$(basename "${f%.c*}") - - if [ ${SKIP_BITCODE_GEN} -eq 0 ]; then - echo "Compile: $f -> ${BASE_NAME}.bc" - - ${TOOLS_DIR}/clang -c -I ${SOURCE_DIR} ${CFLAGS} -isysroot ${SYS_ROOT} -emit-llvm \ - -o "bc-default/${BASE_NAME}.bc" "$f" - ${TOOLS_DIR}/clang -c -I ${SOURCE_DIR} ${CFLAGS} -isysroot ${SYS_ROOT} -flto=thin \ - -o "bc-thinlto/${BASE_NAME}.bc" "$f" - - echo "Disassemble ${BASE_NAME}.bc -> ${BASE_NAME}.ll" - ${TOOLS_DIR}/llvm-dis bc-default/${BASE_NAME}.bc -o ll-default/${BASE_NAME}.ll - ${TOOLS_DIR}/llvm-dis bc-thinlto/${BASE_NAME}.bc -o ll-thinlto/${BASE_NAME}.ll - fi - - ALL_BITCODE_FILES="${ALL_BITCODE_FILES} ${BASE_NAME}.bc" - if [ "${BASE_NAME}" != "${MAIN_FILE_BASENAME}" ]; then - LLI_EXTRA_MODULES="${LLI_EXTRA_MODULES} -extra-module=${BASE_NAME}.bc" - fi -done - -if [ ${SKIP_BITCODE_GEN} -eq 0 ]; then - echo "Link global index file: index.thinlto.bc" - cd ${ROOT_DIR}/bc-thinlto - ${TOOLS_DIR}/llvm-lto --thinlto -o ${ROOT_DIR}/bc-thinlto/index ${ALL_BITCODE_FILES} - - echo "Disassemble global index file: index.thinlto.ll" - cd ${ROOT_DIR}/ll-thinlto - ${TOOLS_DIR}/llvm-dis -o index.thinlto.ll ${ROOT_DIR}/bc-thinlto/index.thinlto.bc -fi - -set -x -cd ${ROOT_DIR}/bc-default -time (${TOOLS_DIR}/clang -o ${MAIN_FILE_BASENAME} -O0 ${LDFLAGS} ${ALL_BITCODE_FILES} && ./${MAIN_FILE_BASENAME} ${EXEC_ARGS} 1>/dev/null) -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=mcjit "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-mcjit "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -compile-threads=8 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy -compile-threads=8 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy -compile-threads=8 -O1 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy -compile-threads=8 -O0 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null -time ${TOOLS_DIR}/SpeculativeJIT -num-threads=8 ${ALL_BITCODE_FILES} --args ${EXEC_ARGS} 1>/dev/null - -cd ${ROOT_DIR}/bc-thinlto -#time (${TOOLS_DIR}/clang -flto=thin -o test ${ALL_BITCODE_FILES} && ./test ${EXEC_ARGS} 1>/dev/null) -time ${TOOLS_DIR}/ThinLtoJIT index.thinlto.bc --args ${EXEC_ARGS} 1>/dev/null diff --git a/llvm/examples/ThinLtoJIT/main.cpp b/llvm/examples/ThinLtoJIT/main.cpp deleted file mode 100644 index 5a338e9474446..0000000000000 --- a/llvm/examples/ThinLtoJIT/main.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/TargetSelect.h" - -#include "ThinLtoJIT.h" - -#include -#include - -using namespace llvm; - -static cl::list - InputFiles(cl::Positional, cl::OneOrMore, - cl::desc("")); - -static cl::list InputArgs("args", cl::Positional, - cl::desc("..."), - cl::ZeroOrMore, cl::PositionalEatsArgs); - -static cl::opt CompileThreads("compile-threads", cl::Optional, - cl::desc("Number of compile threads"), - cl::init(4)); - -static cl::opt LoadThreads("load-threads", cl::Optional, - cl::desc("Number of module load threads"), - cl::init(8)); - -static cl::opt - LookaheadLevels("lookahead", cl::Optional, - cl::desc("Calls to look ahead of execution"), cl::init(4)); - -static cl::opt DiscoveryFlagsBucketSize( - "discovery-flag-bucket-size", cl::Optional, - cl::desc("Flags per bucket (rounds up to memory pages)"), cl::init(4096)); - -static cl::opt - MemFence("mem-fence", - cl::desc("Control memory fences for cache synchronization"), - cl::init(orc::ThinLtoJIT::NeverFence), - cl::values(clEnumValN(orc::ThinLtoJIT::NeverFence, "never", - "No use of memory fences"), - clEnumValN(orc::ThinLtoJIT::FenceStaticCode, "static", - "Use of memory fences in static code only"), - clEnumValN(orc::ThinLtoJIT::FenceJITedCode, "jited", - "Install memory fences in JITed code only"), - clEnumValN(orc::ThinLtoJIT::AlwaysFence, "always", - "Always use of memory fences"))); - -static cl::opt - AllowNudge("allow-nudge", - cl::desc("Allow the symbol generator to nudge symbols into " - "discovery even though they haven't been reached"), - cl::init(false)); - -static cl::opt PrintStats("print-stats", - cl::desc("Print module stats on shutdown"), - cl::init(false)); - -int main(int argc, char *argv[]) { - InitLLVM X(argc, argv); - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - cl::ParseCommandLineOptions(argc, argv, "ThinLtoJIT"); - - Error Err = Error::success(); - auto atLeastOne = [](unsigned N) { return std::max(1u, N); }; - - orc::ThinLtoJIT Jit(InputFiles, "main", atLeastOne(LookaheadLevels), - atLeastOne(CompileThreads), atLeastOne(LoadThreads), - DiscoveryFlagsBucketSize, MemFence, AllowNudge, - PrintStats, Err); - if (Err) { - logAllUnhandledErrors(std::move(Err), errs(), "[ThinLtoJIT] "); - exit(1); - } - - ExitOnError ExitOnErr; - ExitOnErr.setBanner("[ThinLtoJIT] "); - - return ExitOnErr(Jit.main(InputArgs)); -} From 306571cc4642c4e443c8cb0593a2b595ef96580e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 1 Oct 2020 10:25:06 +0200 Subject: [PATCH 249/544] [ORC][examples] Temporarily remove LLJITWithChildProcess until ORC TPC lands This solves a phase ordering problem: OrcV2 remote process support depends on OrcV2 removable code, OrcV2 removable code depends on OrcV1 removal, OrcV1 removal depends on LLJITWithChildProcess migration, and LLJITWithChildProcess migration depends on OrcV2 TargetProcessControl support. --- llvm/examples/OrcV2Examples/CMakeLists.txt | 4 - .../LLJITWithChildProcess/CMakeLists.txt | 14 -- .../LLJITWithChildProcess.cpp | 128 ------------------ .../LLJITWithChildProcess/RemoteJITUtils.h | 121 ----------------- 4 files changed, 267 deletions(-) delete mode 100644 llvm/examples/OrcV2Examples/LLJITWithChildProcess/CMakeLists.txt delete mode 100644 llvm/examples/OrcV2Examples/LLJITWithChildProcess/LLJITWithChildProcess.cpp delete mode 100644 llvm/examples/OrcV2Examples/LLJITWithChildProcess/RemoteJITUtils.h diff --git a/llvm/examples/OrcV2Examples/CMakeLists.txt b/llvm/examples/OrcV2Examples/CMakeLists.txt index 1d87a84fee97b..2b7f9c501beff 100644 --- a/llvm/examples/OrcV2Examples/CMakeLists.txt +++ b/llvm/examples/OrcV2Examples/CMakeLists.txt @@ -11,7 +11,3 @@ add_subdirectory(LLJITWithThinLTOSummaries) add_subdirectory(OrcV2CBindingsAddObjectFile) add_subdirectory(OrcV2CBindingsBasicUsage) add_subdirectory(OrcV2CBindingsReflectProcessSymbols) - -if(CMAKE_HOST_UNIX) - add_subdirectory(LLJITWithChildProcess) -endif() diff --git a/llvm/examples/OrcV2Examples/LLJITWithChildProcess/CMakeLists.txt b/llvm/examples/OrcV2Examples/LLJITWithChildProcess/CMakeLists.txt deleted file mode 100644 index 54acae427b214..0000000000000 --- a/llvm/examples/OrcV2Examples/LLJITWithChildProcess/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Core - ExecutionEngine - IRReader - JITLink - OrcError - OrcJIT - Support - nativecodegen - ) - -add_llvm_example(LLJITInChildProcess - LLJITWithChildProcess.cpp - ) diff --git a/llvm/examples/OrcV2Examples/LLJITWithChildProcess/LLJITWithChildProcess.cpp b/llvm/examples/OrcV2Examples/LLJITWithChildProcess/LLJITWithChildProcess.cpp deleted file mode 100644 index 7fb019ddec2b0..0000000000000 --- a/llvm/examples/OrcV2Examples/LLJITWithChildProcess/LLJITWithChildProcess.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===--- LLJITWithChildProcess.cpp - LLJIT targeting a child process ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// In this example we will execute JITed code in a child process: -// -// 1. Launch a remote process. -// 2. Create a JITLink-compatible remote memory manager. -// 3. Use LLJITBuilder to create a (greedy) LLJIT instance. -// 4. Add the Add1Example module and execute add1(). -// 5. Terminate the remote target session. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" -#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" -#include "llvm/ExecutionEngine/Orc/LLJIT.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" -#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/raw_ostream.h" - -#include "../ExampleModules.h" -#include "RemoteJITUtils.h" - -#include -#include - -#define DEBUG_TYPE "orc" - -using namespace llvm; -using namespace llvm::orc; - -// Executable running in the child process for remote execution. It communicates -// via stdin/stdout pipes. -cl::opt - ChildExecPath("remote-process", cl::Required, - cl::desc("Specify the filename of the process to launch for " - "remote JITing."), - cl::value_desc("filename")); - -int main(int argc, char *argv[]) { - InitLLVM X(argc, argv); - - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - - cl::ParseCommandLineOptions(argc, argv, "LLJITWithChildProcess"); - - ExitOnError ExitOnErr; - ExitOnErr.setBanner(std::string(argv[0]) + ": "); - - if (!sys::fs::can_execute(ChildExecPath)) { - WithColor::error(errs(), argv[0]) - << "Child executable invalid: '" << ChildExecPath << "'\n"; - return -1; - } - - ExecutionSession ES; - ES.setErrorReporter([&](Error Err) { ExitOnErr(std::move(Err)); }); - - // Launch the remote process and get a channel to it. - pid_t ChildPID; - std::unique_ptr Ch = launchRemote(ChildExecPath, ChildPID); - if (!Ch) { - WithColor::error(errs(), argv[0]) << "Failed to launch remote JIT.\n"; - exit(1); - } - - LLVM_DEBUG({ - dbgs() - << "Launched executable in subprocess " << ChildPID << ":\n" - << ChildExecPath << "\n\n" - << "You may want to attach a debugger now. Press enter to continue.\n"; - fflush(stdin); - getchar(); - }); - - std::unique_ptr Client = - ExitOnErr(remote::OrcRemoteTargetClient::Create(*Ch, ES)); - - // Create a JITLink-compatible remote memory manager. - using MemManager = remote::OrcRemoteTargetClient::RemoteJITLinkMemoryManager; - std::unique_ptr RemoteMM = - ExitOnErr(Client->createRemoteJITLinkMemoryManager()); - - // Our remote target is running on the host system. - auto JTMB = ExitOnErr(JITTargetMachineBuilder::detectHost()); - JTMB.setCodeModel(CodeModel::Small); - - // Create an LLJIT instance with a JITLink ObjectLinkingLayer. - auto J = ExitOnErr( - LLJITBuilder() - .setJITTargetMachineBuilder(std::move(JTMB)) - .setObjectLinkingLayerCreator( - [&](ExecutionSession &ES, - const Triple &TT) -> std::unique_ptr { - return std::make_unique(ES, *RemoteMM); - }) - .create()); - - auto M = ExitOnErr(parseExampleModule(Add1Example, "add1")); - - ExitOnErr(J->addIRModule(std::move(M))); - - // Look up the JIT'd function. - auto Add1Sym = ExitOnErr(J->lookup("add1")); - - // Run in child target. - Expected Result = Client->callIntInt(Add1Sym.getAddress(), 42); - if (Result) - outs() << "add1(42) = " << *Result << "\n"; - else - ES.reportError(Result.takeError()); - - // Signal the remote target that we're done JITing. - ExitOnErr(Client->terminateSession()); - LLVM_DEBUG(dbgs() << "Subprocess terminated\n"); - - return 0; -} diff --git a/llvm/examples/OrcV2Examples/LLJITWithChildProcess/RemoteJITUtils.h b/llvm/examples/OrcV2Examples/LLJITWithChildProcess/RemoteJITUtils.h deleted file mode 100644 index 9e3f1d417b816..0000000000000 --- a/llvm/examples/OrcV2Examples/LLJITWithChildProcess/RemoteJITUtils.h +++ /dev/null @@ -1,121 +0,0 @@ -//===-- RemoteJITUtils.h - Utilities for remote-JITing ----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utilities for remote-JITing -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXAMPLES_ORCV2EXAMPLES_LLJITWITHCHILDPROCESS_REMOTEJITUTILS_H -#define LLVM_EXAMPLES_ORCV2EXAMPLES_LLJITWITHCHILDPROCESS_REMOTEJITUTILS_H - -#include "llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h" -#include - -#if !defined(_MSC_VER) && !defined(__MINGW32__) -#include -#else -#include -#endif - -/// RPC channel that reads from and writes from file descriptors. -class FDRawChannel final : public llvm::orc::rpc::RawByteChannel { -public: - FDRawChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {} - - llvm::Error readBytes(char *Dst, unsigned Size) override { - assert(Dst && "Attempt to read into null."); - ssize_t Completed = 0; - while (Completed < static_cast(Size)) { - ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed); - if (Read <= 0) { - auto ErrNo = errno; - if (ErrNo == EAGAIN || ErrNo == EINTR) - continue; - else - return llvm::errorCodeToError( - std::error_code(errno, std::generic_category())); - } - Completed += Read; - } - return llvm::Error::success(); - } - - llvm::Error appendBytes(const char *Src, unsigned Size) override { - assert(Src && "Attempt to append from null."); - ssize_t Completed = 0; - while (Completed < static_cast(Size)) { - ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed); - if (Written < 0) { - auto ErrNo = errno; - if (ErrNo == EAGAIN || ErrNo == EINTR) - continue; - else - return llvm::errorCodeToError( - std::error_code(errno, std::generic_category())); - } - Completed += Written; - } - return llvm::Error::success(); - } - - llvm::Error send() override { return llvm::Error::success(); } - -private: - int InFD, OutFD; -}; - -// Launch child process and return a channel to it. -std::unique_ptr launchRemote(std::string ExecPath, - pid_t &ChildPID) { - // Create two pipes. - int PipeFD[2][2]; - if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0) - perror("Error creating pipe: "); - - ChildPID = fork(); - - if (ChildPID == 0) { - // In the child... - - // Close the parent ends of the pipes - close(PipeFD[0][1]); - close(PipeFD[1][0]); - - // Execute the child process. - std::unique_ptr ChildPath, ChildIn, ChildOut; - { - ChildPath.reset(new char[ExecPath.size() + 1]); - std::copy(ExecPath.begin(), ExecPath.end(), &ChildPath[0]); - ChildPath[ExecPath.size()] = '\0'; - std::string ChildInStr = llvm::utostr(PipeFD[0][0]); - ChildIn.reset(new char[ChildInStr.size() + 1]); - std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]); - ChildIn[ChildInStr.size()] = '\0'; - std::string ChildOutStr = llvm::utostr(PipeFD[1][1]); - ChildOut.reset(new char[ChildOutStr.size() + 1]); - std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]); - ChildOut[ChildOutStr.size()] = '\0'; - } - - char *const args[] = {&ChildPath[0], &ChildIn[0], &ChildOut[0], nullptr}; - int rc = execv(ExecPath.c_str(), args); - if (rc != 0) - perror("Error executing child process: "); - llvm_unreachable("Error executing child process"); - } - // else we're the parent... - - // Close the child ends of the pipes - close(PipeFD[0][0]); - close(PipeFD[1][1]); - - // Return an RPC channel connected to our end of the pipes. - return std::make_unique(PipeFD[1][0], PipeFD[0][1]); -} - -#endif From 7b90516d479ca6aadf4e261747d62c854d6f5463 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 10:42:08 +0100 Subject: [PATCH 250/544] [ARM][LowOverheadLoops] Start insertion point If possible, try not to move the start position earlier than it already is. Differential Revision: https://reviews.llvm.org/D88542 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 31 ++++++++++--------- .../Thumb2/LowOverheadLoops/it-block-mov.mir | 2 +- .../loop-dec-copy-prev-iteration.mir | 1 - .../LowOverheadLoops/mov-after-dlstp.mir | 2 +- .../Thumb2/LowOverheadLoops/mov-operand.ll | 2 +- .../CodeGen/Thumb2/mve-float32regloops.ll | 2 +- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index c86cf32357322..fe28470c99ec4 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1015,9 +1015,11 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { // Find a suitable position to insert the loop start instruction. It needs to // be able to safely define LR. auto FindStartInsertionPoint = [](MachineInstr *Start, + MachineInstr *Dec, MachineBasicBlock::iterator &InsertPt, MachineBasicBlock *&InsertBB, - ReachingDefAnalysis &RDA) { + ReachingDefAnalysis &RDA, + InstSet &ToRemove) { // We can define LR because LR already contains the same value. if (Start->getOperand(0).getReg() == ARM::LR) { InsertPt = MachineBasicBlock::iterator(Start); @@ -1033,23 +1035,29 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { MI->getOperand(2).getImm() == ARMCC::AL; }; - MachineBasicBlock *MBB = Start->getParent(); - // Find an insertion point: // - Is there a (mov lr, Count) before Start? If so, and nothing else - // writes to Count before Start, we can insert at that mov. + // writes to Count before Start, we can insert at start. if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) { if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) { - InsertPt = MachineBasicBlock::iterator(LRDef); - InsertBB = LRDef->getParent(); + SmallPtrSet Ignore = { Dec }; + if (!TryRemove(LRDef, RDA, ToRemove, Ignore)) + return false; + InsertPt = MachineBasicBlock::iterator(Start); + InsertBB = Start->getParent(); return true; } } // - Is there a (mov lr, Count) after Start? If so, and nothing else writes - // to Count after Start, we can insert at that mov. + // to Count after Start, we can insert at that mov (which will now be + // dead). + MachineBasicBlock *MBB = Start->getParent(); if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) { if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) { + SmallPtrSet Ignore = { Start, Dec }; + if (!TryRemove(LRDef, RDA, ToRemove, Ignore)) + return false; InsertPt = MachineBasicBlock::iterator(LRDef); InsertBB = LRDef->getParent(); return true; @@ -1066,7 +1074,8 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { return true; }; - if (!FindStartInsertionPoint(Start, StartInsertPt, StartInsertBB, RDA)) { + if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA, + ToRemove)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; return; @@ -1411,9 +1420,6 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) { // Collect and remove the users of iteration count. SmallPtrSet Killed = { LoLoop.Start, LoLoop.Dec, LoLoop.End }; - if (LoLoop.StartInsertPt != LoLoop.StartInsertBB->end()) - Killed.insert(&*LoLoop.StartInsertPt); - if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"); } @@ -1439,9 +1445,6 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { if (!IsDo) MIB.add(Start->getOperand(1)); - // If we're inserting at a mov lr, then remove it as it's redundant. - if (InsertPt != MBB->end()) - LoLoop.ToRemove.insert(&*InsertPt); LoLoop.ToRemove.insert(Start); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); return &*MIB; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir index f63d3fde7dee7..2ee932acb840a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir @@ -78,12 +78,12 @@ body: | ; CHECK: successors: %bb.5(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed $r4 ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0 ; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: bb.5: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir index f59a322e14337..f2cb5547c7dd6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir @@ -273,7 +273,6 @@ body: | ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep617) ; CHECK: renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep418) - ; CHECK: dead $r12 = tMOVr $lr, 14 /* CC::al */, $noreg ; CHECK: renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep219) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index a4b094020fcbf..b71c2dd7aaa01 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -175,8 +175,8 @@ body: | ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0 + ; CHECK: $lr = t2DLS killed $r4 ; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index b97204c69f321..0e182ece4a75c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -26,10 +26,10 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 -; CHECK-NEXT: dls lr, r4 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 +; CHECK-NEXT: dls lr, r4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 165bf72c7187d..a43f564951e93 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1435,8 +1435,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(%struct.arm_biqu ; CHECK-NEXT: vdup.32 q1, r6 ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: vmov.f32 s6, s12 -; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: vmov.f32 s10, s14 +; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: vmov.f32 s7, s12 ; CHECK-NEXT: vmov.f32 s11, s14 ; CHECK-NEXT: .LBB17_3: @ Parent Loop BB17_2 Depth=1 From 6ec5f324973dfbe7b4a489593dde5073ff63ff64 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 30 Sep 2020 11:10:49 +0100 Subject: [PATCH 251/544] [ARM][LowOverheadLoops] Iteration count liveness Before deciding to insert a [W|D]LSTP, check that defining LR with the element count won't affect any other instructions that should be taking the iteration count. Differential Revision: https://reviews.llvm.org/D88549 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 17 ++++++++++ .../LowOverheadLoops/it-block-chain-store.mir | 32 ++++++++++++------- .../LowOverheadLoops/mov-after-dlstp.mir | 15 +++++---- .../Thumb2/LowOverheadLoops/mov-operand.ll | 11 ++++--- 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index fe28470c99ec4..cd9c38752ad23 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -611,6 +611,23 @@ bool LowOverheadLoop::ValidateTailPredicate() { return false; } + // Check that creating a [W|D]LSTP, which will define LR with an element + // count instead of iteration count, won't affect any other instructions + // than the LoopStart and LoopDec. + // TODO: We should try to insert the [W|D]LSTP after any of the other uses. + if (StartInsertPt == Start && Start->getOperand(0).getReg() == ARM::LR) { + if (auto *IterCount = RDA.getMIOperand(Start, 0)) { + SmallPtrSet Uses; + RDA.getGlobalUses(IterCount, ARM::LR, Uses); + for (auto *Use : Uses) { + if (Use != Start && Use != Dec) { + LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use); + return false; + } + } + } + } + // For tail predication, we need to provide the number of elements, instead // of the iteration count, to the loop start instruction. The number of // elements is provided to the vctp instruction, so we need to check that diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir index c5713c8224b5e..c2de31ddef1f2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir @@ -142,18 +142,22 @@ body: | ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 + ; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: liveins: $r0, $r1, $r2, $r12 ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) - ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 ; CHECK: bb.2.do.end: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: @@ -242,19 +246,23 @@ body: | ; CHECK: renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: liveins: $r0, $r1, $r2, $r12 ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) - ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 ; CHECK: bb.2.do.end: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index b71c2dd7aaa01..94e3e26c819d6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -160,17 +160,20 @@ body: | ; CHECK: renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg - ; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 + ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body.i: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12 - ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4) - ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0 + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 ; CHECK: bb.2.arm_mean_f32_mve.exit: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 0e182ece4a75c..1404075dce901 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -17,13 +17,16 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: add.w lr, r12, r3, lsr #2 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r4, lr ; CHECK-NEXT: .LBB0_1: @ %do.body.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r12], #16 -; CHECK-NEXT: vadd.f32 q0, q0, q1 -; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: vctp.32 r3 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q1, [r12], #16 +; CHECK-NEXT: vaddt.f32 q0, q0, q1 +; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 ; CHECK-NEXT: vadd.f32 s0, s3, s3 From 8931c3d682763e6129f8d05ebe4e3b8dcc3e08e2 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 25 Sep 2020 13:37:03 +0100 Subject: [PATCH 252/544] [NFC] Iterate across an explicit list of scalable MVTs when driving setOperationAction. Iterating across all of integer_scalable_vector_valuetypes seems wasteful when there's only a handful we care about. Also removes some rouge whitespace. Differential Revision: https://reviews.llvm.org/D88552 --- .../Target/AArch64/AArch64ISelLowering.cpp | 135 ++++++++++-------- 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 578bf1560d019..0c8da4e20d7d0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -137,6 +137,23 @@ static inline EVT getPackedSVEVectorVT(EVT VT) { } } +static inline MVT getPromotedVTForPredicate(MVT VT) { + assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && + "Expected scalable predicate vector type!"); + switch (VT.getVectorMinNumElements()) { + default: + llvm_unreachable("unexpected element count for vector"); + case 2: + return MVT::nxv2i64; + case 4: + return MVT::nxv4i32; + case 8: + return MVT::nxv8i16; + case 16: + return MVT::nxv16i8; + } +} + /// Returns true if VT's elements occupy the lowest bit positions of its /// associated register class without any intervening space. /// @@ -973,76 +990,74 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a // splat of 0 or undef) once vector selects supported in SVE codegen. See // D68877 for more details. - for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { - if (isTypeLegal(VT)) { - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::UINT_TO_FP, VT, Custom); - setOperationAction(ISD::SINT_TO_FP, VT, Custom); - setOperationAction(ISD::FP_TO_UINT, VT, Custom); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::SDIV, VT, Custom); - setOperationAction(ISD::UDIV, VT, Custom); - setOperationAction(ISD::SMIN, VT, Custom); - setOperationAction(ISD::UMIN, VT, Custom); - setOperationAction(ISD::SMAX, VT, Custom); - setOperationAction(ISD::UMAX, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - if (VT.getScalarType() == MVT::i1) { - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); - } - } - } + for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::UINT_TO_FP, VT, Custom); + setOperationAction(ISD::SINT_TO_FP, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::UDIV, VT, Custom); + setOperationAction(ISD::SMIN, VT, Custom); + setOperationAction(ISD::UMIN, VT, Custom); + setOperationAction(ISD::SMAX, VT, Custom); + setOperationAction(ISD::UMAX, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + } + + // Illegal unpacked integer vector types. for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); } - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); - for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { - if (isTypeLegal(VT)) { - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::FADD, VT, Custom); - setOperationAction(ISD::FDIV, VT, Custom); - setOperationAction(ISD::FMA, VT, Custom); - setOperationAction(ISD::FMUL, VT, Custom); - setOperationAction(ISD::FNEG, VT, Custom); - setOperationAction(ISD::FSUB, VT, Custom); - setOperationAction(ISD::FCEIL, VT, Custom); - setOperationAction(ISD::FFLOOR, VT, Custom); - setOperationAction(ISD::FNEARBYINT, VT, Custom); - setOperationAction(ISD::FRINT, VT, Custom); - setOperationAction(ISD::FROUND, VT, Custom); - setOperationAction(ISD::FROUNDEVEN, VT, Custom); - setOperationAction(ISD::FTRUNC, VT, Custom); - setOperationAction(ISD::FSQRT, VT, Custom); + // There are no legal MVT::nxv16f## based types. + if (VT != MVT::nxv16i1) { + setOperationAction(ISD::SINT_TO_FP, VT, Promote); + AddPromotedToType(ISD::SINT_TO_FP, VT, getPromotedVTForPredicate(VT)); + setOperationAction(ISD::UINT_TO_FP, VT, Promote); + AddPromotedToType(ISD::UINT_TO_FP, VT, getPromotedVTForPredicate(VT)); } } - setOperationAction(ISD::SINT_TO_FP, MVT::nxv2i1, Promote); - AddPromotedToType(ISD::SINT_TO_FP, MVT::nxv2i1, MVT::nxv2i64); - setOperationAction(ISD::SINT_TO_FP, MVT::nxv4i1, Promote); - AddPromotedToType(ISD::SINT_TO_FP, MVT::nxv4i1, MVT::nxv4i32); - setOperationAction(ISD::SINT_TO_FP, MVT::nxv8i1, Promote); - AddPromotedToType(ISD::SINT_TO_FP, MVT::nxv8i1, MVT::nxv8i16); + for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, + MVT::nxv4f32, MVT::nxv2f64}) { + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::FADD, VT, Custom); + setOperationAction(ISD::FDIV, VT, Custom); + setOperationAction(ISD::FMA, VT, Custom); + setOperationAction(ISD::FMUL, VT, Custom); + setOperationAction(ISD::FNEG, VT, Custom); + setOperationAction(ISD::FSUB, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction(ISD::FNEARBYINT, VT, Custom); + setOperationAction(ISD::FRINT, VT, Custom); + setOperationAction(ISD::FROUND, VT, Custom); + setOperationAction(ISD::FROUNDEVEN, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FSQRT, VT, Custom); + } + + setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::nxv2i1, Promote); - AddPromotedToType(ISD::UINT_TO_FP, MVT::nxv2i1, MVT::nxv2i64); - setOperationAction(ISD::UINT_TO_FP, MVT::nxv4i1, Promote); - AddPromotedToType(ISD::UINT_TO_FP, MVT::nxv4i1, MVT::nxv4i32); - setOperationAction(ISD::UINT_TO_FP, MVT::nxv8i1, Promote); - AddPromotedToType(ISD::UINT_TO_FP, MVT::nxv8i1, MVT::nxv8i16); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); // NOTE: Currently this has to happen after computeRegisterProperties rather // than the preferred option of combining it with the addRegisterClass call. @@ -3456,7 +3471,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_frintm: return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), - Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_frinti: return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); From 75db7cf78ad5138e767b8d04c9a758009191ee0c Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Wed, 30 Sep 2020 10:55:51 +0100 Subject: [PATCH 253/544] [SVE][CodeGen] Legalisation of integer -> floating point conversions Splitting the operand of a scalable [S|U]INT_TO_FP results in a concat_vectors operation where the operands are unpacked FP scalable vectors (e.g. nxv2f32). This patch adds custom lowering of concat_vectors which checks that the number of operands is 2, and isel patterns to match concat_vectors of scalable FP types with uzp1. Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D88033 --- .../Target/AArch64/AArch64ISelLowering.cpp | 18 ++- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 + llvm/test/CodeGen/AArch64/sve-split-fcvt.ll | 141 ++++++++++++++++++ 3 files changed, 165 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c8da4e20d7d0..d8072dbb856e4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -990,7 +990,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a // splat of 0 or undef) once vector selects supported in SVE codegen. See // D68877 for more details. - for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); @@ -1018,7 +1017,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); @@ -1035,6 +1034,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -3835,6 +3835,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerRETURNADDR(Op, DAG); case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: @@ -9150,6 +9152,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return SDValue(); } +SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getValueType().isScalableVector() && + isTypeLegal(Op.getValueType()) && + "Expected legal scalable vector type!"); + + if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2) + return Op; + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a356f8390d2b3..49e8ac86e0df7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1195,6 +1195,14 @@ multiclass sve_prefetch; + // Concatenate two floating point vectors. + def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), + (UZP1_ZZZ_H $v1, $v2)>; + def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll index fbd9beceaa1f0..41b3e0ee13e16 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -95,3 +95,144 @@ define @fcvtzu_d_nxv4f32( %a) { %res = fptoui %a to ret %res } + +; SINT_TO_FP + +; Split operand +define @scvtf_s_nxv4i64( %a) { +; CHECK-LABEL: scvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z1.s, p0/m, z1.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv8i64( %a) { +; CHECK-LABEL: scvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z3.h, p0/m, z3.d +; CHECK-NEXT: scvtf z2.h, p0/m, z2.d +; CHECK-NEXT: scvtf z1.h, p0/m, z1.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; Split result +define @scvtf_s_nxv16i8( %a) { +; CHECK-LABEL: scvtf_s_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.h, z0.b +; CHECK-NEXT: sunpkhi z0.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpkhi z1.s, z1.h +; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: scvtf z0.s, p0/m, z2.s +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: scvtf z3.s, p0/m, z4.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i32( %a) { +; CHECK-LABEL: scvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: scvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i1( %a) { +; CHECK-LABEL: scvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: scvtf z0.d, p2/m, z0.d +; CHECK-NEXT: scvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; UINT_TO_FP + +; Split operand +define @ucvtf_s_nxv4i64( %a) { +; CHECK-LABEL: ucvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv8i64( %a) { +; CHECK-LABEL: ucvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d +; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d +; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +; Split result +define @ucvtf_d_nxv4i32( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_d_nxv4i1( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d +; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} From 38f625d0d1360b035271422bab922d22ed04d79a Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 1 Oct 2020 10:47:02 +0100 Subject: [PATCH 254/544] [ARM][LowOverheadLoops] Adjust Start insertion. Try to move the insertion point to become the terminator of the block, usually the preheader. Differential Revision: https://reviews.llvm.org/D88638 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 63 ++++++++----------- .../lstp-insertion-position.mir | 41 ++++-------- .../LowOverheadLoops/mov-after-dlstp.mir | 15 ++--- .../Thumb2/LowOverheadLoops/mov-operand.ll | 11 ++-- .../move-def-before-start.mir | 23 ++----- .../LowOverheadLoops/move-start-after-def.mir | 23 ++----- .../Thumb2/LowOverheadLoops/reductions.ll | 4 +- .../CodeGen/Thumb2/mve-float16regloops.ll | 2 +- .../CodeGen/Thumb2/mve-float32regloops.ll | 6 +- llvm/test/CodeGen/Thumb2/mve-fma-loops.ll | 4 +- .../Thumb2/mve-gather-scatter-optimisation.ll | 2 +- .../Thumb2/mve-gather-scatter-ptr-address.ll | 2 +- .../Thumb2/mve-gather-scatter-tailpred.ll | 30 +++------ llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll | 10 +-- .../test/CodeGen/Thumb2/mve-pred-threshold.ll | 4 +- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll | 14 ++--- 16 files changed, 92 insertions(+), 162 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index cd9c38752ad23..ac787a1674ab7 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -644,47 +644,10 @@ bool LowOverheadLoop::ValidateTailPredicate() { return false; } - // The element count register maybe defined after InsertPt, in which case we - // need to try to move either InsertPt or the def so that the [w|d]lstp can - // use the value. - - if (StartInsertPt != StartInsertBB->end() && - !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) { - if (auto *ElemDef = RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) { - if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) { - ElemDef->removeFromParent(); - StartInsertBB->insert(StartInsertPt, ElemDef); - LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " - << *ElemDef); - } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) { - StartInsertPt->removeFromParent(); - StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), - &*StartInsertPt); - LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); - } else { - // If we fail to move an instruction and the element count is provided - // by a mov, use the mov operand if it will have the same value at the - // insertion point - MachineOperand Operand = ElemDef->getOperand(1); - if (isMovRegOpcode(ElemDef->getOpcode()) && - RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) == - RDA.getUniqueReachingMIDef(&*StartInsertPt, Operand.getReg())) { - TPNumElements = Operand; - NumElements = TPNumElements.getReg(); - } else { - LLVM_DEBUG(dbgs() - << "ARM Loops: Unable to move element count to loop " - << "start instruction.\n"); - return false; - } - } - } - } - // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect // world the [w|d]lstp instruction would be last instruction in the preheader // and so it would only affect instructions within the loop body. But due to - // scheduling, and/or the logic in this pass (above), the insertion point can + // scheduling, and/or the logic in this pass, the insertion point can // be moved earlier. So if the Loop Start isn't the last instruction in the // preheader, and if the initial element count is smaller than the vector // width, the Loop Start instruction will immediately generate one or more @@ -1091,12 +1054,36 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { return true; }; + // We know that we can define safely LR at InsertPt, but maybe we could + // push the insertion point to later on in the basic block. + auto TryAdjustInsertionPoint = [](MachineBasicBlock::iterator &InsertPt, + MachineInstr *Start, + ReachingDefAnalysis &RDA) { + + MachineBasicBlock *MBB = InsertPt->getParent(); + MachineBasicBlock::iterator FirstNonTerminator = + MBB->getFirstTerminator(); + unsigned CountReg = Start->getOperand(0).getReg(); + + // Get the latest possible insertion point and check whether the semantics + // will be maintained if Start was inserted there. + if (FirstNonTerminator == MBB->end()) { + if (RDA.isReachingDefLiveOut(Start, CountReg) && + RDA.isReachingDefLiveOut(Start, ARM::LR)) + InsertPt = FirstNonTerminator; + } else if (RDA.hasSameReachingDef(Start, &*FirstNonTerminator, CountReg) && + RDA.hasSameReachingDef(Start, &*FirstNonTerminator, ARM::LR)) + InsertPt = FirstNonTerminator; + }; + if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA, ToRemove)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; return; } + + TryAdjustInsertionPoint(StartInsertPt, Start, RDA); Revert = !ValidateRanges(Start, End, BBUtils, ML); CannotTailPredicate = !ValidateTailPredicate(); } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir index 3e7c87de0282c..e5131fd4e1b41 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir @@ -153,25 +153,17 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 - ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg - ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4) - ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: liveins: $lr, $q1, $r0, $r1 + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1315, align 4) + ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q1 ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg @@ -285,27 +277,18 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg - ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) - ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: liveins: $lr, $q1, $r0, $r1 + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q1 ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index 94e3e26c819d6..5bafc295a3eff 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -163,17 +163,14 @@ body: | ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg + ; CHECK: $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: bb.1.do.body.i: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4) - ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0 - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12 + ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.arm_mean_f32_mve.exit: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 1404075dce901..12c6858c961b5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -17,16 +17,13 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: add.w lr, r12, r3, lsr #2 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r4, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_1: @ %do.body.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q1, [r12], #16 -; CHECK-NEXT: vaddt.f32 q0, q0, q1 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: vldrw.u32 q1, [r12], #16 +; CHECK-NEXT: vadd.f32 q0, q0, q1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 ; CHECK-NEXT: vadd.f32 s0, s3, s3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir index ea3589f48fdb7..005524b878894 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir @@ -117,32 +117,21 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir index 0295acb67962d..f7e0e699c75a1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir @@ -117,32 +117,21 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index b5cac5d6a3cf8..a0cdb822b370f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -451,9 +451,9 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r6, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: vmov.32 q0[0], r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_5: @ %vector.body46 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -686,8 +686,8 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) { ; CHECK-NEXT: mla r2, r4, r3, r2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_6: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index d364eb97fff72..f3db06e571caf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1156,8 +1156,8 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr.w lr, [sp] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index a43f564951e93..6f9b001ea992b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1116,8 +1116,8 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -1436,9 +1436,9 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(%struct.arm_biqu ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: vmov.f32 s6, s12 ; CHECK-NEXT: vmov.f32 s10, s14 -; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: vmov.f32 s7, s12 ; CHECK-NEXT: vmov.f32 s11, s14 +; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: .LBB17_3: @ Parent Loop BB17_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrw.u32 q4, [r1, q0, uxtw #2] @@ -1589,8 +1589,8 @@ define arm_aapcs_vfpcc void @fms(float* nocapture readonly %pSrc1, float* nocapt ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB18_3 Depth 2 ; CHECK-NEXT: ldr r4, [r2] -; CHECK-NEXT: dls lr, r5 ; CHECK-NEXT: vdup.32 q0, r4 +; CHECK-NEXT: dls lr, r5 ; CHECK-NEXT: .LBB18_3: @ %while.body ; CHECK-NEXT: @ Parent Loop BB18_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll index 86cbec661f1f5..68ebeaa830cb2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -265,9 +265,9 @@ define arm_aapcs_vfpcc void @fmss1(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: eor r12, r4, #-2147483648 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 @@ -529,9 +529,9 @@ define arm_aapcs_vfpcc void @fms1(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: eor r12, r4, #-2147483648 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index bba302d7fbcc0..d158c85e401b8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -709,12 +709,12 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16* ; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: ldr r0, [sp, #112] ; CHECK-NEXT: sub.w lr, r11, r5 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mla r3, r0, r5, r1 ; CHECK-NEXT: add r5, r9 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: add.w r5, r0, r5, lsl #1 ; CHECK-NEXT: add.w r3, r6, r3, lsl #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB10_14: @ %for.body8.us.us ; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 ; CHECK-NEXT: @ Parent Loop BB10_8 Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll index bfc64b8c8e261..030fb3b91cf8f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll @@ -556,8 +556,8 @@ define void @ptr_iv_v8f16_mult(half* noalias nocapture readonly %A, half* noalia ; CHECK-NEXT: vmov.f16 r1, s0 ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: adr r2, .LCPI9_1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q2, [r0, q0, uxtw #1] diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index 4054b75edd0ed..a4a67512b7199 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -11,9 +11,9 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: movw lr, #1250 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 @@ -231,17 +231,11 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: beq.w .LBB3_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: adr r7, .LCPI3_5 -; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: vmov.i32 q0, #0x8000 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: adr r6, .LCPI3_4 ; CHECK-NEXT: adr r5, .LCPI3_3 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: adr r4, .LCPI3_2 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #160] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r7] ; CHECK-NEXT: adr.w r8, .LCPI3_1 @@ -274,22 +268,18 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload -; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u32 q4, [r0, q0] +; CHECK-NEXT: vldrb.u32 q4, [r0, q0] ; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u32 q7, [r0, q0] +; CHECK-NEXT: vldrb.u32 q7, [r0, q0] ; CHECK-NEXT: vldrw.u32 q0, [sp, #144] @ 16-byte Reload ; CHECK-NEXT: vldrw.u32 q5, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.i32 q6, q7, q0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u32 q1, [r0, q5] +; CHECK-NEXT: vldrb.u32 q1, [r0, q5] ; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload ; CHECK-NEXT: vmul.i32 q3, q4, q0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload @@ -320,14 +310,12 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload ; CHECK-NEXT: vshr.u32 q1, q1, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.32 q1, [r1, q0] +; CHECK-NEXT: vstrb.32 q1, [r1, q0] ; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload -; CHECK-NEXT: vpstt -; CHECK-NEXT: vstrbt.32 q2, [r1, q0] -; CHECK-NEXT: vstrbt.32 q6, [r1, q5] +; CHECK-NEXT: vstrb.32 q2, [r1, q0] +; CHECK-NEXT: vstrb.32 q6, [r1, q5] ; CHECK-NEXT: adds r1, #12 -; CHECK-NEXT: le lr, .LBB3_2 +; CHECK-NEXT: letp lr, .LBB3_2 ; CHECK-NEXT: .LBB3_3: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #216 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 0f3e893fd8017..d67ccd9393cc4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -257,13 +257,13 @@ define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %inpu ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r7, r11, r3, r1 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB2_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -425,13 +425,13 @@ define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readon ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r7, r11, r3, r1 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB3_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -735,13 +735,13 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: ldr.w r11, [sp, #88] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r3, r9, r11, r0 ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: .LBB5_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -907,13 +907,13 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK-NEXT: ldr.w r11, [sp, #88] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r3, r9, r11, r0 ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: .LBB6_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -1120,7 +1120,6 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf ; CHECK-NEXT: ldr.w r1, [r1, r10, lsl #2] ; CHECK-NEXT: ldrd r6, r7, [r0, #32] ; CHECK-NEXT: ldr.w r3, [r3, r10, lsl #2] -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: add.w r6, r6, r2, lsl #2 ; CHECK-NEXT: add.w r12, r12, r1, lsl #2 ; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload @@ -1129,6 +1128,7 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf ; CHECK-NEXT: add.w r1, r2, r11, lsl #2 ; CHECK-NEXT: add.w r8, r1, r11, lsl #2 ; CHECK-NEXT: add.w r9, r8, r11, lsl #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_7: @ Parent Loop BB7_3 Depth=1 ; CHECK-NEXT: @ Parent Loop BB7_6 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll index 12561d560309a..35e02faa14e01 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -187,8 +187,8 @@ define arm_aapcs_vfpcc void @thresh_f32(float* %data, i16 zeroext %N, float %T) ; CHECK-NEXT: add.w lr, r2, r1, lsr #2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: eor r2, r1, #-2147483648 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] @@ -480,8 +480,8 @@ define arm_aapcs_vfpcc void @thresh_rev_f32(float* %data, i16 zeroext %N, float ; CHECK-NEXT: add.w lr, r2, r1, lsr #2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: eor r2, r1, #-2147483648 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index f586857f289f7..fdaea92c4329c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -36,8 +36,8 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: mov.w r10, #-1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrd r4, r5, [r0] @@ -256,10 +256,10 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: adr r7, .LCPI1_1 ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r7] -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: mvn r9, #-2147483648 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 @@ -544,8 +544,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vdup.32 q1, r7 ; CHECK-NEXT: mov.w r12, #-1 ; CHECK-NEXT: mvn r8, #-2147483648 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload @@ -773,8 +773,8 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: add.w r11, r1, r5, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 ; CHECK-NEXT: add.w r12, r0, r5, lsl #2 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r5, [sp] @ 4-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrd r4, r9, [r0] @@ -1617,8 +1617,8 @@ define arm_aapcs_vfpcc void @ssatmul_8t_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q5, [sp] @ 16-byte Reload @@ -2842,7 +2842,6 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n ; CHECK-NEXT: vmov.i8 q2, #0x0 ; CHECK-NEXT: add.w lr, lr, r12, lsr #4 ; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI18_2 @@ -2854,6 +2853,7 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n ; CHECK-NEXT: vmov.i8 q3, #0xff ; CHECK-NEXT: vldrw.u32 q6, [r4] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB18_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload @@ -3142,7 +3142,6 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8* ; CHECK-NEXT: vmov.i8 q2, #0x0 ; CHECK-NEXT: add.w lr, lr, r12, lsr #4 ; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI19_2 @@ -3154,6 +3153,7 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8* ; CHECK-NEXT: vmov.i8 q3, #0xff ; CHECK-NEXT: vldrw.u32 q6, [r4] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB19_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload From 69acdfe075fa8eb18781f88f4d0cd1ea40fa6e48 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 1 Oct 2020 15:58:31 +0700 Subject: [PATCH 255/544] [SCEV] Prove implicaitons via AddRec start If we know that some predicate is true for AddRec and an invariant (w.r.t. this AddRec's loop), this fact is, in particular, true on the first iteration. We can try to prove the facts we need using the start value. The motivating example is proving things like ``` isImpliedCondOperands(>=, X, 0, {X,+,-1}, 0} ``` Differential Revision: https://reviews.llvm.org/D88208 Reviewed By: reames --- llvm/include/llvm/Analysis/ScalarEvolution.h | 31 +++++- llvm/lib/Analysis/ScalarEvolution.cpp | 103 +++++++++++++----- .../Analysis/ScalarEvolutionTest.cpp | 32 ++++++ 3 files changed, 134 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index febca473776aa..158257a5aa9a1 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1677,23 +1677,30 @@ class ScalarEvolution { getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the given FoundCondValue value evaluates to true. + /// whenever the given FoundCondValue value evaluates to true in given + /// Context. If Context is nullptr, then the found predicate is true + /// everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse); + const Value *FoundCondValue, bool Inverse, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is @@ -1740,6 +1747,18 @@ class ScalarEvolution { const SCEV *FoundLHS, const SCEV *FoundRHS); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to weaken the known condition basing on fact that + /// FoundLHS is an AddRec. + bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, + const Instruction *Context); + /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e51b31673105c..a3e454fefcf0f 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9549,15 +9549,16 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, // Try to prove (Pred, LHS, RHS) using isImpliedCond. auto ProveViaCond = [&](const Value *Condition, bool Inverse) { - if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) + const Instruction *Context = &BB->front(); + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) return true; if (ProvingStrictComparison) { if (!ProvedNonStrictComparison) - ProvedNonStrictComparison = - isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); + ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS, + Condition, Inverse, Context); if (!ProvedNonEquality) - ProvedNonEquality = - isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); + ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, + Condition, Inverse, Context); if (ProvedNonStrictComparison && ProvedNonEquality) return true; } @@ -9623,7 +9624,8 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse) { + const Value *FoundCondValue, bool Inverse, + const Instruction *Context) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; @@ -9634,12 +9636,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (const BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } } @@ -9657,14 +9663,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, - const SCEV *FoundLHS, - const SCEV *FoundRHS) { + const SCEV *FoundLHS, const SCEV *FoundRHS, + const Instruction *Context) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -9708,16 +9714,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { if (isa(RHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); else - return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), - RHS, LHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, + LHS, FoundLHS, FoundRHS, Context); } // Unsigned comparison is the same as signed comparison when both the operands @@ -9725,7 +9731,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (CmpInst::isUnsigned(FoundPred) && CmpInst::getSignedPredicate(FoundPred) == Pred && isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && @@ -9762,8 +9768,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, - getConstant(SharperMin))) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), + Context)) return true; LLVM_FALLTHROUGH; @@ -9778,7 +9784,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // // If V `Pred` Min implies LHS `Pred` RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), + Context)) return true; break; @@ -9786,14 +9793,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(SharperMin))) + LHS, V, getConstant(SharperMin), Context)) return true; LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(Min))) + LHS, V, getConstant(Min), Context)) return true; break; @@ -9807,11 +9814,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) - if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) - if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, + Context)) return true; // Otherwise assume the worst. @@ -9890,6 +9898,44 @@ Optional ScalarEvolution::computeConstantDifference(const SCEV *More, return None; } +bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { + // Try to recognize the following pattern: + // + // FoundRHS = ... + // ... + // loop: + // FoundLHS = {Start,+,W} + // context_bb: // Basic block from the same loop + // known(Pred, FoundLHS, FoundRHS) + // + // If some predicate is known in the context of a loop, it is also known on + // each iteration of this loop, including the first iteration. Therefore, in + // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to + // prove the original pred using this fact. + if (!Context) + return false; + // Make sure AR varies in the context block. + if (auto *AR = dyn_cast(FoundLHS)) { + if (!AR->getLoop()->contains(Context->getParent())) + return false; + if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); + } + + if (auto *AR = dyn_cast(FoundRHS)) { + if (!AR->getLoop()->contains(Context)) + return false; + if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); + } + + return false; +} + bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { @@ -10080,13 +10126,18 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS) { + const SCEV *FoundRHS, + const Instruction *Context) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; + if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, + Context)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index ff33495f22711..e5ffc21fb6646 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1251,4 +1251,36 @@ TEST_F(ScalarEvolutionsTest, SCEVgetExitLimitForGuardedLoop) { }); } +TEST_F(ScalarEvolutionsTest, ImpliedViaAddRecStart) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32* %p) { " + "entry: " + " %x = load i32, i32* %p, !range !0 " + " br label %loop " + "loop: " + " %iv = phi i32 [ %x, %entry], [%iv.next, %backedge] " + " %ne.check = icmp ne i32 %iv, 0 " + " br i1 %ne.check, label %backedge, label %exit " + "backedge: " + " %iv.next = add i32 %iv, -1 " + " br label %loop " + "exit:" + " ret void " + "} " + "!0 = !{i32 0, i32 2147483647}", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *X = SE.getSCEV(getInstructionByName(F, "x")); + auto *Context = getInstructionByName(F, "iv.next"); + EXPECT_TRUE(SE.isKnownPredicateAt(ICmpInst::ICMP_NE, X, + SE.getZero(X->getType()), Context)); + }); +} + } // end namespace llvm From a81b938b6dee0e1ed4dd44e7d59325d0aa4774cc Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 1 Oct 2020 06:57:35 -0400 Subject: [PATCH 256/544] [mlir][Linalg] Fix ASAN bug ``` LinalgTilingOptions &setTileSizes(ValueRange ts) ``` makes it all too easy to create stack-use-after-return errors. In particular, c694588fc52a8845174fee06ad0bcfa338e87816 introduced one such issue. Instead just take a copy in the lambda and be done with it. --- mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index a7f8c31e22643..e47dafc9bf52b 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -326,9 +326,9 @@ struct LinalgTilingOptions { /// Set the `tileSizeComputationFunction` to return the values `ts`. The /// values must not fold away when tiling. Otherwise, use a more robust /// `tileSizeComputationFunction`. - LinalgTilingOptions &setTileSizes(ValueRange ts) { - tileSizeComputationFunction = [&](OpBuilder &, Operation *) { - return SmallVector(ts.begin(), ts.end()); + LinalgTilingOptions &setTileSizes(SmallVector ts) { + tileSizeComputationFunction = [=](OpBuilder &, Operation *) { + return ts; }; return *this; } From fcf70e1e3b1d57d5fde6b99d0188d1b1774429af Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 1 Oct 2020 11:06:55 +0100 Subject: [PATCH 257/544] [SVE][CodeGen] Lower scalable fp_extend & fp_round operations This patch adds FP_EXTEND_MERGE_PASSTHRU & FP_ROUND_MERGE_PASSTHRU ISD nodes, used to lower scalable vector fp_extend/fp_round operations. fp_round has an additional argument, the 'trunc' flag, which is an integer of zero or one. This also fixes a warning introduced by the new tests added to sve-split-fcvt.ll, resulting from an implicit TypeSize -> uint64_t cast in SplitVecOp_FP_ROUND. Reviewed By: sdesmalen, paulwalker-arm Differential Revision: https://reviews.llvm.org/D88321 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 2 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- .../Target/AArch64/AArch64ISelLowering.cpp | 15 +- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../lib/Target/AArch64/AArch64InstrFormats.td | 7 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 88 +++++++---- llvm/lib/Target/AArch64/SVEInstrFormats.td | 26 ++++ llvm/test/CodeGen/AArch64/sve-fcvt.ll | 88 +++++++++++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll | 146 ++++++++++++++++++ 9 files changed, 340 insertions(+), 38 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 356eb1ce0964b..0b3edc3416859 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2715,7 +2715,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b9362f1e762d3..eef467d116b7f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4613,8 +4613,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d8072dbb856e4..fb70b2d801da0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -183,6 +183,8 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::FROUND_MERGE_PASSTHRU: case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: case AArch64ISD::FTRUNC_MERGE_PASSTHRU: + case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: + case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::FCVTZU_MERGE_PASSTHRU: @@ -1052,6 +1054,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); + setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::FP_ROUND, VT, Custom); } setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); @@ -1580,6 +1584,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) @@ -2908,6 +2914,9 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); RTLIB::Libcall LC; @@ -2918,6 +2927,9 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU); + bool IsStrict = Op->isStrictFPOpcode(); SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); EVT SrcVT = SrcVal.getValueType(); @@ -16003,7 +16015,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SmallVector Operands = {Pg}; for (const SDValue &V : Op->op_values()) { - assert((isa(V) || V.getValueType().isScalableVector()) && + assert((!V.getValueType().isVector() || + V.getValueType().isScalableVector()) && "Only scalable vectors are supported!"); Operands.push_back(V); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 51391d309b404..1b8f62e427dbb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -105,6 +105,8 @@ enum NodeType : unsigned { FROUNDEVEN_MERGE_PASSTHRU, FSQRT_MERGE_PASSTHRU, FTRUNC_MERGE_PASSTHRU, + FP_ROUND_MERGE_PASSTHRU, + FP_EXTEND_MERGE_PASSTHRU, UINT_TO_FP_MERGE_PASSTHRU, SINT_TO_FP_MERGE_PASSTHRU, FCVTZU_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 61155087cbe28..68dc477567a5d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -914,6 +914,13 @@ def imm0_1 : Operand, ImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf ]>; +def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>, + SDTCVecEltisVT<1,i1> +]>; + +def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>; +def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; @@ -1178,6 +1185,11 @@ multiclass sve_prefetch; // Extract subvectors from FP SVE vectors + def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))), + (UUNPKHI_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), (UUNPKLO_ZZ_S ZPR:$Zs)>; def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), @@ -1400,40 +1412,48 @@ multiclass sve_prefetch; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; - defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, null_frag, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, null_frag, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>; - defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, null_frag, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, null_frag, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, null_frag, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, null_frag, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>; - defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + + // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. + // This is ignored by the pattern below where it is matched by (i64 timm0_1) + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // Floating-point -> signed integer def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index d0226a73d87d2..45a712c897a44 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -318,6 +318,13 @@ class SVE_1_Op_Passthru_Pat; +// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the +// type of rounding. This is matched by timm0_1 in pattern below and ignored. +class SVE_1_Op_Passthru_Round_Pat +: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -2299,6 +2306,25 @@ multiclass sve_fp_2op_p_zd opc, string asm, def : SVE_1_Op_Passthru_Pat(NAME)>; } +multiclass sve_fp_2op_p_zdr opc, string asm, + RegisterOperand i_zprtype, + RegisterOperand o_zprtype, + SDPatternOperator int_op, + SDPatternOperator ir_op, ValueType vt1, + ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { + def NAME : sve_fp_2op_p_zd; + + // convert vt1 to a packed type for the intrinsic patterns + defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, + !eq(!cast(vt1), "nxv4f16"): nxv8f16, + !eq(!cast(vt1), "nxv2f32"): nxv4f32, + 1 : vt1); + + def : SVE_3_Op_Pat(NAME)>; + + def : SVE_1_Op_Passthru_Round_Pat(NAME)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll index 9b980ac25c108..1b395806755d1 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -5,6 +5,94 @@ ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning +; +; FP_EXTEND +; + +define @fcvts_nxv2f16( %a) { +; CHECK-LABEL: fcvts_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvts_nxv4f16( %a) { +; CHECK-LABEL: fcvts_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv2f16( %a) { +; CHECK-LABEL: fcvtd_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv2f32( %a) { +; CHECK-LABEL: fcvtd_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +; +; FP_ROUND +; + +define @fcvth_nxv2f32( %a) { +; CHECK-LABEL: fcvth_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv4f32( %a) { +; CHECK-LABEL: fcvth_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv2f64( %a) { +; CHECK-LABEL: fcvth_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv2f64( %a) { +; CHECK-LABEL: fcvts_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + ; ; FP_TO_SINT ; diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll index 41b3e0ee13e16..6f608c830cfe5 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -5,6 +5,152 @@ ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning +; FP_EXTEND + +define @fcvts_nxv8f16( %a) { +; CHECK-LABEL: fcvts_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: fcvt z1.s, p0/m, z2.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv4f16( %a) { +; CHECK-LABEL: fcvtd_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: fcvt z1.d, p0/m, z2.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv8f16( %a) { +; CHECK-LABEL: fcvtd_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z2.h +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: fcvt z3.d, p0/m, z4.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv4f32( %a) { +; CHECK-LABEL: fcvtd_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: fcvt z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv8f32( %a) { +; CHECK-LABEL: fcvtd_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z1.s +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: fcvt z0.d, p0/m, z2.s +; CHECK-NEXT: fcvt z1.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z4.s +; CHECK-NEXT: fcvt z3.d, p0/m, z5.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +; FP_ROUND + +define @fcvth_nxv8f32( %a) { +; CHECK-LABEL: fcvth_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv8f64( %a) { +; CHECK-LABEL: fcvth_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z3.h, p0/m, z3.d +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv4f64( %a) { +; CHECK-LABEL: fcvth_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv4f64( %a) { +; CHECK-LABEL: fcvts_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv8f64( %a) { +; CHECK-LABEL: fcvts_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: fcvt z3.s, p0/m, z3.d +; CHECK-NEXT: fcvt z2.s, p0/m, z2.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + ; FP_TO_SINT ; Split operand From ef4e971e5e18ae796466623df8f26265ba6bdfb5 Mon Sep 17 00:00:00 2001 From: Andrew Paverd Date: Thu, 1 Oct 2020 10:07:40 +0100 Subject: [PATCH 258/544] [CFGuard] Add address-taken IAT tables and delay-load support This patch adds support for creating Guard Address-Taken IAT Entry Tables (.giats$y sections) in object files, matching the behavior of MSVC. These contain lists of address-taken imported functions, which are used by the linker to create the final GIATS table. Additionally, if any DLLs are delay-loaded, the linker must look through the .giats tables and add the respective load thunks of address-taken imports to the GFIDS table, as these are also valid call targets. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D87544 --- lld/COFF/DLL.cpp | 10 ++ lld/COFF/ICF.cpp | 2 +- lld/COFF/InputFiles.cpp | 2 + lld/COFF/InputFiles.h | 7 +- lld/COFF/Symbols.h | 7 ++ lld/COFF/Writer.cpp | 46 ++++++- lld/test/COFF/giats.s | 117 ++++++++++++++++++ llvm/include/llvm/MC/MCObjectFileInfo.h | 2 + llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp | 47 +++++-- llvm/lib/MC/MCObjectFileInfo.cpp | 5 + llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll | 22 ++++ llvm/tools/llvm-readobj/COFFDumper.cpp | 10 ++ 12 files changed, 259 insertions(+), 18 deletions(-) create mode 100644 lld/test/COFF/giats.s create mode 100644 llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 50301ad91b1d5..e88a6b1bffb06 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -19,6 +19,7 @@ #include "DLL.h" #include "Chunks.h" +#include "SymbolTable.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" @@ -653,9 +654,18 @@ void DelayLoadContents::create(Defined *h) { auto *c = make(extName, 0); names.push_back(make(c)); hintNames.push_back(c); + // Add a syntentic symbol for this load thunk, using the "__imp_load" + // prefix, in case this thunk needs to be added to the list of valid + // call targets for Control Flow Guard. + StringRef symName = saver.save("__imp_load_" + extName); + s->loadThunkSym = + cast(symtab->addSynthetic(symName, t)); } } thunks.push_back(tm); + StringRef tmName = + saver.save("__tailMerge_" + syms[0]->getDLLName().lower()); + symtab->addSynthetic(tmName, tm); // Terminate with null values. addresses.push_back(make(8)); names.push_back(make(8)); diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 1b33634b63d6a..386f861fb27fb 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -131,7 +131,7 @@ bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { auto considerForICF = [](const SectionChunk &assoc) { StringRef Name = assoc.getSectionName(); return !(Name.startswith(".debug") || Name == ".gfids$y" || - Name == ".gljmp$y"); + Name == ".giats$y" || Name == ".gljmp$y"); }; auto ra = make_filter_range(a->children(), considerForICF); auto rb = make_filter_range(b->children(), considerForICF); diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index aaa00d0f7279a..37f66131620e6 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -280,6 +280,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber, debugChunks.push_back(c); else if (name == ".gfids$y") guardFidChunks.push_back(c); + else if (name == ".giats$y") + guardIATChunks.push_back(c); else if (name == ".gljmp$y") guardLJmpChunks.push_back(c); else if (name == ".sxdata") diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 0a5114b165f0c..26a6e5b7b70d9 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -144,6 +144,7 @@ class ObjFile : public InputFile { ArrayRef getDebugChunks() { return debugChunks; } ArrayRef getSXDataChunks() { return sxDataChunks; } ArrayRef getGuardFidChunks() { return guardFidChunks; } + ArrayRef getGuardIATChunks() { return guardIATChunks; } ArrayRef getGuardLJmpChunks() { return guardLJmpChunks; } ArrayRef getSymbols() { return symbols; } @@ -283,9 +284,11 @@ class ObjFile : public InputFile { // 32-bit x86. std::vector sxDataChunks; - // Chunks containing symbol table indices of address taken symbols and longjmp - // targets. These are not linked into the final binary when /guard:cf is set. + // Chunks containing symbol table indices of address taken symbols, address + // taken IAT entries, and longjmp targets. These are not linked into the + // final binary when /guard:cf is set. std::vector guardFidChunks; + std::vector guardIATChunks; std::vector guardLJmpChunks; // This vector contains a list of all symbols defined or referenced by this diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 1da4df3669662..370f72745900d 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -343,6 +343,13 @@ class DefinedImportData : public Defined { uint16_t getOrdinal() { return file->hdr->OrdinalHint; } ImportFile *file; + + // This is a pointer to the synthetic symbol associated with the load thunk + // for this symbol that will be called if the DLL is delay-loaded. This is + // needed for Control Flow Guard because if this DefinedImportData symbol is a + // valid call target, the corresponding load thunk must also be marked as a + // valid call target. + DefinedSynthetic *loadThunkSym; }; // This class represents a symbol for a jump table entry which jumps diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index d1081b008ea40..b437a681483ff 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -227,6 +227,9 @@ class Writer { void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); + void getSymbolsFromSections(ObjFile *file, + ArrayRef symIdxChunks, + std::vector &symbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym); void setSectionPermissions(); @@ -605,8 +608,9 @@ void Writer::run() { createImportTables(); createSections(); - createMiscChunks(); appendImportThunks(); + // Import thunks must be added before the Control Flow Guard tables are added. + createMiscChunks(); createExportTable(); mergeSections(); removeUnusedSections(); @@ -1618,6 +1622,8 @@ static void markSymbolsWithRelocations(ObjFile *file, // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; + SymbolRVASet giatsRVASet; + std::vector giatsSymbols; SymbolRVASet longJmpTargets; for (ObjFile *file : ObjFile::instances) { // If the object was compiled with /guard:cf, the address taken symbols @@ -1627,6 +1633,8 @@ void Writer::createGuardCFTables() { // possibly address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); + markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet); + getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); @@ -1641,6 +1649,16 @@ void Writer::createGuardCFTables() { for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); + // For each entry in the .giats table, check if it has a corresponding load + // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if + // so, add the load thunk to the address taken (.gfids) table. + for (Symbol *s : giatsSymbols) { + if (auto *di = dyn_cast(s)) { + if (di->loadThunkSym) + addSymbolToRVASet(addressTakenSyms, di->loadThunkSym); + } + } + // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) @@ -1649,6 +1667,10 @@ void Writer::createGuardCFTables() { maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); + // Add the Guard Address Taken IAT Entry Table (.giats). + maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table", + "__guard_iat_count"); + // Add the longjmp target table unless the user told us not to. if (config->guardCF == GuardCFLevel::Full) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", @@ -1665,11 +1687,11 @@ void Writer::createGuardCFTables() { } // Take a list of input sections containing symbol table indices and add those -// symbols to an RVA table. The challenge is that symbol RVAs are not known and +// symbols to a vector. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. -void Writer::markSymbolsForRVATable(ObjFile *file, +void Writer::getSymbolsFromSections(ObjFile *file, ArrayRef symIdxChunks, - SymbolRVASet &tableSymbols) { + std::vector &symbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. @@ -1687,7 +1709,7 @@ void Writer::markSymbolsForRVATable(ObjFile *file, } // Read each symbol table index and check if that symbol was included in the - // final link. If so, add it to the table symbol set. + // final link. If so, add it to the vector of symbols. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); @@ -1699,12 +1721,24 @@ void Writer::markSymbolsForRVATable(ObjFile *file, } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) - addSymbolToRVASet(tableSymbols, cast(s)); + symbols.push_back(cast(s)); } } } } +// Take a list of input sections containing symbol table indices and add those +// symbols to an RVA table. +void Writer::markSymbolsForRVATable(ObjFile *file, + ArrayRef symIdxChunks, + SymbolRVASet &tableSymbols) { + std::vector syms; + getSymbolsFromSections(file, symIdxChunks, syms); + + for (Symbol *s : syms) + addSymbolToRVASet(tableSymbols, cast(s)); +} + // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. diff --git a/lld/test/COFF/giats.s b/lld/test/COFF/giats.s new file mode 100644 index 0000000000000..f18720f3692fa --- /dev/null +++ b/lld/test/COFF/giats.s @@ -0,0 +1,117 @@ +# REQUIRES: x86 + +# Make a DLL that exports exportfn1. +# RUN: yaml2obj %p/Inputs/export.yaml -o %basename_t-exp.obj +# RUN: lld-link /out:%basename_t-exp.dll /dll %basename_t-exp.obj /export:exportfn1 /implib:%basename_t-exp.lib + +# Make an object file that imports exportfn1. +# RUN: llvm-mc -triple x86_64-windows-msvc %s -filetype=obj -o %basename_t.obj + +# Check that the Guard address-taken IAT entry tables are propagated to the final executable. +# RUN: lld-link %basename_t.obj -guard:cf -entry:main -out:%basename_t-nodelay.exe %basename_t-exp.lib +# RUN: llvm-readobj --file-headers --coff-load-config %basename_t-nodelay.exe | FileCheck %s --check-prefix CHECK + +# CHECK: ImageBase: 0x140000000 +# CHECK: LoadConfig [ +# CHECK: GuardCFFunctionTable: 0x140002114 +# CHECK: GuardCFFunctionCount: 1 +# CHECK: GuardFlags: 0x10500 +# CHECK: GuardAddressTakenIatEntryTable: 0x140002118 +# CHECK: GuardAddressTakenIatEntryCount: 1 +# CHECK: ] +# CHECK: GuardFidTable [ +# CHECK-NEXT: 0x14000{{.*}} +# CHECK-NEXT: ] +# CHECK: GuardIatTable [ +# CHECK-NEXT: 0x14000{{.*}} +# CHECK-NEXT: ] + + +# Check that the additional load thunk symbol is added to the GFIDs table. +# RUN: lld-link %basename_t.obj -guard:cf -entry:main -out:%basename_t-delay.exe %basename_t-exp.lib -alternatename:__delayLoadHelper2=main -delayload:%basename_t-exp.dll +# RUN: llvm-readobj --file-headers --coff-load-config %basename_t-delay.exe | FileCheck %s --check-prefix DELAY-CHECK + +# DELAY-CHECK: ImageBase: 0x140000000 +# DELAY-CHECK: LoadConfig [ +# DELAY-CHECK: GuardCFFunctionTable: 0x140002114 +# DELAY-CHECK: GuardCFFunctionCount: 2 +# DELAY-CHECK: GuardFlags: 0x10500 +# DELAY-CHECK: GuardAddressTakenIatEntryTable: 0x14000211C +# DELAY-CHECK: GuardAddressTakenIatEntryCount: 1 +# DELAY-CHECK: ] +# DELAY-CHECK: GuardFidTable [ +# DELAY-CHECK-NEXT: 0x14000{{.*}} +# DELAY-CHECK-NEXT: 0x14000{{.*}} +# DELAY-CHECK-NEXT: ] +# DELAY-CHECK: GuardIatTable [ +# DELAY-CHECK-NEXT: 0x14000{{.*}} +# DELAY-CHECK-NEXT: ] + + +# This assembly is reduced from C code like: +# __declspec(noinline) +# void IndirectCall(BOOL (func)(HANDLE)) { +# (*func)(NULL); +# } +# int main(int argc, char** argv) { +# IndirectCall(exportfn1); +# } + + .text + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +.set @feat.00, 2048 + .def IndirectCall; .scl 2; .type 32; .endef + .globl IndirectCall # -- Begin function IndirectCall + .p2align 4, 0x90 +IndirectCall: # @IndirectCall +# %bb.0: + subq $40, %rsp + movq %rcx, 32(%rsp) + movq 32(%rsp), %rax + movq %rax, %rdx # This would otherwise have be: movq __guard_dispatch_icall_fptr(%rip), %rdx + xorl %ecx, %ecx + callq *%rdx + nop + addq $40, %rsp + retq + # -- End function + .def main; .scl 2; .type 32; .endef + .globl main # -- Begin function main + .p2align 4, 0x90 +main: # @main +# %bb.0: + subq $56, %rsp + movq __imp_exportfn1(%rip), %rax + movq %rdx, 48(%rsp) + movl %ecx, 44(%rsp) + movq %rax, %rcx + callq IndirectCall + xorl %eax, %eax + addq $56, %rsp + retq + # -- End function + .section .gfids$y,"dr" + .section .giats$y,"dr" + .symidx __imp_exportfn1 + .section .gljmp$y,"dr" + +# Load configuration directory entry (winnt.h _IMAGE_LOAD_CONFIG_DIRECTORY64). +# The linker will define the __guard_* symbols. + .section .rdata,"dr" +.globl _load_config_used +_load_config_used: + .long 256 + .fill 124, 1, 0 + .quad __guard_fids_table + .quad __guard_fids_count + .long __guard_flags + .fill 12, 1, 0 + .quad __guard_iat_table + .quad __guard_iat_count + .quad __guard_longjmp_table + .quad __guard_fids_count + .fill 84, 1, 0 \ No newline at end of file diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 8c6bcba2332b1..316086833d975 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -215,6 +215,7 @@ class MCObjectFileInfo { MCSection *XDataSection = nullptr; MCSection *SXDataSection = nullptr; MCSection *GFIDsSection = nullptr; + MCSection *GIATsSection = nullptr; MCSection *GLJMPSection = nullptr; // XCOFF specific sections @@ -398,6 +399,7 @@ class MCObjectFileInfo { MCSection *getXDataSection() const { return XDataSection; } MCSection *getSXDataSection() const { return SXDataSection; } MCSection *getGFIDsSection() const { return GFIDsSection; } + MCSection *getGIATsSection() const { return GIATsSection; } MCSection *getGLJMPSection() const { return GLJMPSection; } // XCOFF specific sections diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 914308d9147e2..09bcf5cb25a21 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file contains support for writing the metadata for Windows Control Flow -// Guard, including address-taken functions, and valid longjmp targets. +// Guard, including address-taken functions and valid longjmp targets. // //===----------------------------------------------------------------------===// @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -78,20 +78,49 @@ static bool isPossibleIndirectCallTarget(const Function *F) { return false; } +/// Returns true if this function should be added to the Guard Address Taken IAT +/// Entry Table (GIATs) instead of the Guard Function ID Table (GFIDs). +static bool isIATAddressTaken(const Function *F) { + if (F->hasDLLImportStorageClass()) { + return true; + } + return false; +} + void WinCFGuard::endModule() { const Module *M = Asm->MMI->getModule(); - std::vector Functions; - for (const Function &F : *M) - if (isPossibleIndirectCallTarget(&F)) - Functions.push_back(&F); - if (Functions.empty() && LongjmpTargets.empty()) + std::vector GFIDsEntries; + std::vector GIATsEntries; + for (const Function &F : *M) { + if (isPossibleIndirectCallTarget(&F)) { + if (isIATAddressTaken(&F)) { + // If the possible call target is reached via the IAT, add it to the + // GIATs table instead of the GFIDs table. + GIATsEntries.push_back(&F); + } else { + // Otherwise add it to the GFIDs table. + GFIDsEntries.push_back(&F); + } + } + } + + if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty()) return; + + // Emit the symbol index of each GFIDs entry to form the GFIDs table. auto &OS = *Asm->OutStreamer; OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); - for (const Function *F : Functions) + for (const Function *F : GFIDsEntries) OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); - // Emit the symbol index of each longjmp target. + // Emit the symbol index of each GIATs entry to form the GIATs table. + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); + for (const Function *F : GIATsEntries) { + OS.EmitCOFFSymbolIndex(Asm->OutContext.getOrCreateSymbol( + Twine("__imp_") + Asm->getSymbol(F)->getName())); + } + + // Emit the symbol index of each longjmp target to form the GLJMP table. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { OS.EmitCOFFSymbolIndex(S); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index ae7345c4e05b9..eec2615974b57 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -752,6 +752,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); + GIATsSection = Ctx->getCOFFSection(".giats$y", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + GLJMPSection = Ctx->getCOFFSection(".gljmp$y", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, diff --git a/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll b/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll new file mode 100644 index 0000000000000..0ac436cc6add5 --- /dev/null +++ b/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s +; Control Flow Guard is currently only available on Windows + +declare dllimport i32 @target_func() + +; Test address-taken functions from imported DLLs are added to the +; Guard Address-Taken IAT Entry table (.giats). +define i32 @func_cf_giats() { +entry: + %func_ptr = alloca i32 ()*, align 8 + store i32 ()* @target_func, i32 ()** %func_ptr, align 8 + %0 = load i32 ()*, i32 ()** %func_ptr, align 8 + %1 = call i32 %0() + ret i32 %1 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"cfguard", i32 2} + +; CHECK-LABEL: .section .giats$y,"dr" +; CHECK-NEXT: .symidx __imp_target_func +; CHECK-NOT: .symidx \ No newline at end of file diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 22e27b3e5a29e..b4fb2e52cb199 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -67,6 +67,8 @@ struct LoadConfigTables { uint32_t GuardFlags = 0; uint64_t GuardFidTableVA = 0; uint64_t GuardFidTableCount = 0; + uint64_t GuardIatTableVA = 0; + uint64_t GuardIatTableCount = 0; uint64_t GuardLJmpTableVA = 0; uint64_t GuardLJmpTableCount = 0; }; @@ -804,6 +806,11 @@ void COFFDumper::printCOFFLoadConfig() { } } + if (Tables.GuardIatTableVA) { + ListScope LS(W, "GuardIatTable"); + printRVATable(Tables.GuardIatTableVA, Tables.GuardIatTableCount, 4); + } + if (Tables.GuardLJmpTableVA) { ListScope LS(W, "GuardLJmpTable"); printRVATable(Tables.GuardLJmpTableVA, Tables.GuardLJmpTableCount, 4); @@ -888,6 +895,9 @@ void COFFDumper::printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables) { Conf->GuardRFVerifyStackPointerFunctionPointer); W.printHex("HotPatchTableOffset", Conf->HotPatchTableOffset); + Tables.GuardIatTableVA = Conf->GuardAddressTakenIatEntryTable; + Tables.GuardIatTableCount = Conf->GuardAddressTakenIatEntryCount; + Tables.GuardLJmpTableVA = Conf->GuardLongJumpTargetTable; Tables.GuardLJmpTableCount = Conf->GuardLongJumpTargetCount; } From bc730b5e43ad4b7efeca977359271fa0eaa7ed45 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Oct 2020 12:49:59 +0100 Subject: [PATCH 259/544] [InstCombine] collectBitParts - use APInt directly to check for out of range bit shifts. NFCI. --- llvm/lib/Transforms/Utils/Local.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0dacb266a063d..550745673bd9f 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2872,10 +2872,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a logical shift by a constant, recurse then shift the result. if (I->isLogicalShift() && isa(I->getOperand(1))) { - unsigned BitShift = - cast(I->getOperand(1))->getLimitedValue(~0U); + const APInt &BitShift = cast(I->getOperand(1))->getValue(); + // Ensure the shift amount is defined. - if (BitShift > BitWidth) + if (BitShift.uge(BitWidth)) return Result; const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, @@ -2887,11 +2887,11 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // Perform the "shift" on BitProvenance. auto &P = Result->Provenance; if (I->getOpcode() == Instruction::Shl) { - P.erase(std::prev(P.end(), BitShift), P.end()); - P.insert(P.begin(), BitShift, BitPart::Unset); + P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end()); + P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset); } else { - P.erase(P.begin(), std::next(P.begin(), BitShift)); - P.insert(P.end(), BitShift, BitPart::Unset); + P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue())); + P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset); } return Result; From b272250221595b14c32db6721a0ae4e5f17ea4d2 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 1 Oct 2020 14:23:45 +0200 Subject: [PATCH 260/544] [lldb] Skip the flakey part of TestStopHookScripted on Linux This test seems to randomly fail on Linux machines. It's only one part of the test failing randomly, so let's just skip it instead of reverting the whole patch (again). --- .../test/API/commands/target/stop-hooks/TestStopHookScripted.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py index e650778fe8e3b..a17f7131d20f1 100644 --- a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py +++ b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py @@ -71,6 +71,8 @@ def test_stop_hooks_scripted_return_false(self): """Test that the returning False from a stop hook works""" self.do_test_auto_continue(True) + # Test is flakey on Linux. + @skipIfLinux def do_test_auto_continue(self, return_true): """Test that auto-continue works.""" # We set auto-continue to 1 but the stop hook only applies to step_out_of_me, From 4f13b999297140486b2faa1b5d8d7c768fb40dfb Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 30 Sep 2020 19:17:16 -0400 Subject: [PATCH 261/544] [libc++] Simplify how we re-export symbols from libc++abi Instead of managing two copies of the symbol lists, reuse the same list in libc++abi and libc++. Differential Revision: https://reviews.llvm.org/D88623 --- libcxx/lib/abi/CHANGELOG.TXT | 14 +++++++++++++ libcxx/lib/abi/x86_64-apple-darwin.v1.abilist | 4 ++++ libcxx/lib/libc++abi-exceptions.exp | 10 ---------- libcxx/lib/libc++abi-exceptions.sjlj.exp | 10 ---------- libcxx/lib/libc++abi-new-delete.exp | 20 ------------------- libcxx/src/CMakeLists.txt | 12 ----------- libcxxabi/src/CMakeLists.txt | 18 ++++++++++++----- 7 files changed, 31 insertions(+), 57 deletions(-) delete mode 100644 libcxx/lib/libc++abi-exceptions.exp delete mode 100644 libcxx/lib/libc++abi-exceptions.sjlj.exp delete mode 100644 libcxx/lib/libc++abi-new-delete.exp diff --git a/libcxx/lib/abi/CHANGELOG.TXT b/libcxx/lib/abi/CHANGELOG.TXT index 0672fb3f4441a..7ed2b7e28d407 100644 --- a/libcxx/lib/abi/CHANGELOG.TXT +++ b/libcxx/lib/abi/CHANGELOG.TXT @@ -12,6 +12,20 @@ Afterwards the ABI list should be updated to include the new changes. New entries should be added directly below the "Version" header. +------------ +Version 12.0 +------------ + +* XXXXXXX - [libc++] Simplify how we re-export symbols from libc++abi + + We re-export some symbols that were exported from libc++abi but not from + libc++. Exporting new symbols is not an ABI break. + + x86_64-apple-apple-darwin + ------------------------- + Symbol added: ___cxa_allocate_dependent_exception + Symbol added: ___cxa_free_dependent_exception + ------------ Version 10.0 ------------ diff --git a/libcxx/lib/abi/x86_64-apple-darwin.v1.abilist b/libcxx/lib/abi/x86_64-apple-darwin.v1.abilist index db06a4cc17cdb..e141feb0b6c1a 100644 --- a/libcxx/lib/abi/x86_64-apple-darwin.v1.abilist +++ b/libcxx/lib/abi/x86_64-apple-darwin.v1.abilist @@ -2431,3 +2431,7 @@ {'type': 'FUNC', 'is_defined': True, 'name': '__ZNSt3__131__arrive_barrier_algorithm_baseEPNS_24__barrier_algorithm_baseEh'} {'type': 'FUNC', 'is_defined': True, 'name': '__ZNSt3__132__destroy_barrier_algorithm_baseEPNS_24__barrier_algorithm_baseE'} {'type': 'FUNC', 'is_defined': True, 'name': '__ZNSt3__134__construct_barrier_algorithm_baseERl'} +{'type': 'U', 'is_defined': False, 'name': '___cxa_allocate_dependent_exception'} +{'type': 'U', 'is_defined': False, 'name': '___cxa_free_dependent_exception'} +{'type': 'I', 'is_defined': True, 'name': '___cxa_allocate_dependent_exception'} +{'type': 'I', 'is_defined': True, 'name': '___cxa_free_dependent_exception'} diff --git a/libcxx/lib/libc++abi-exceptions.exp b/libcxx/lib/libc++abi-exceptions.exp deleted file mode 100644 index 600a65ffead21..0000000000000 --- a/libcxx/lib/libc++abi-exceptions.exp +++ /dev/null @@ -1,10 +0,0 @@ -___cxa_allocate_exception -___cxa_begin_catch -___cxa_call_unexpected -___cxa_current_exception_type -___cxa_end_catch -___cxa_free_exception -___cxa_get_exception_ptr -___cxa_rethrow -___cxa_throw -___gxx_personality_v0 diff --git a/libcxx/lib/libc++abi-exceptions.sjlj.exp b/libcxx/lib/libc++abi-exceptions.sjlj.exp deleted file mode 100644 index 10073d615c244..0000000000000 --- a/libcxx/lib/libc++abi-exceptions.sjlj.exp +++ /dev/null @@ -1,10 +0,0 @@ -___cxa_allocate_exception -___cxa_begin_catch -___cxa_call_unexpected -___cxa_current_exception_type -___cxa_end_catch -___cxa_free_exception -___cxa_get_exception_ptr -___cxa_rethrow -___cxa_throw -___gxx_personality_sj0 diff --git a/libcxx/lib/libc++abi-new-delete.exp b/libcxx/lib/libc++abi-new-delete.exp deleted file mode 100644 index 9f74b0336efc8..0000000000000 --- a/libcxx/lib/libc++abi-new-delete.exp +++ /dev/null @@ -1,20 +0,0 @@ -__ZdaPv -__ZdaPvRKSt9nothrow_t -__ZdaPvSt11align_val_t -__ZdaPvSt11align_val_tRKSt9nothrow_t -__ZdaPvm -__ZdaPvmSt11align_val_t -__ZdlPv -__ZdlPvRKSt9nothrow_t -__ZdlPvSt11align_val_t -__ZdlPvSt11align_val_tRKSt9nothrow_t -__ZdlPvm -__ZdlPvmSt11align_val_t -__Znam -__ZnamRKSt9nothrow_t -__ZnamSt11align_val_t -__ZnamSt11align_val_tRKSt9nothrow_t -__Znwm -__ZnwmRKSt9nothrow_t -__ZnwmSt11align_val_t -__ZnwmSt11align_val_tRKSt9nothrow_t diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index dcd53c8a302af..fc9fc0e7bc27d 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -212,18 +212,6 @@ if (LIBCXX_ENABLE_SHARED) "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi.v${LIBCXX_LIBCPPABI_VERSION}.exp" "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp" "-Wl,-force_symbols_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/weak.exp") - - if (LIBCXX_ENABLE_EXCEPTIONS) - if ("${CMAKE_OSX_ARCHITECTURES}" MATCHES "^(armv6|armv7|armv7s)$") - target_link_libraries(cxx_shared PRIVATE "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi-exceptions.sjlj.exp") - else() - target_link_libraries(cxx_shared PRIVATE "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi-exceptions.exp") - endif() - endif() - - if (NOT LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS) - target_link_libraries(cxx_shared PRIVATE "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi-new-delete.exp") - endif() endif() # Generate a linker script in place of a libc++.so symlink. diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 85e3fda034f7c..e9e454082a054 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -204,19 +204,27 @@ if (LIBCXXABI_ENABLE_SHARED) # -exported_symbols_list is only available on Apple platforms if (APPLE) - target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/itanium-base.exp") + function(export_symbols file) + target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${file}") + endfunction() + function(reexport_symbols file) + export_symbols("${file}") + target_link_libraries(cxxabi_shared INTERFACE "-Wl,-reexported_symbols_list,${file}") + endfunction() + + export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/itanium-base.exp") if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS) - target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/new-delete.exp") + reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/new-delete.exp") endif() if (LIBCXXABI_ENABLE_EXCEPTIONS) - target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/exceptions.exp") + reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/exceptions.exp") if ("${CMAKE_OSX_ARCHITECTURES}" MATCHES "^(armv6|armv7|armv7s)$") - target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/personality-sjlj.exp") + reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/personality-sjlj.exp") else() - target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/personality-v0.exp") + reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/personality-v0.exp") endif() endif() endif() From cccb7cf1a52f38182f56d947bd609027726c778b Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 1 Oct 2020 14:32:02 +0200 Subject: [PATCH 262/544] [lldb] Add missing import for LLDB test decorators to TestStopHookScripted This test wasn't using decorators before and was missing the import, so my previous commit broke the test. --- .../test/API/commands/target/stop-hooks/TestStopHookScripted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py index a17f7131d20f1..014890e0d973b 100644 --- a/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py +++ b/lldb/test/API/commands/target/stop-hooks/TestStopHookScripted.py @@ -7,7 +7,7 @@ import lldb import lldbsuite.test.lldbutil as lldbutil from lldbsuite.test.lldbtest import * - +from lldbsuite.test.decorators import * class TestStopHooks(TestBase): From 7e02bc81c6dad90b0f98f74152f4b0991087d78d Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 1 Oct 2020 13:37:47 +0100 Subject: [PATCH 263/544] [NFC][ARM] LowOverheadLoop DEBUG statements --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 30 ++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index ac787a1674ab7..d642499833d72 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -603,8 +603,10 @@ bool LowOverheadLoop::ValidateTailPredicate() { return false; } - if (!VPTState::isValid(RDA)) + if (!VPTState::isValid(RDA)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n"); return false; + } if (!ValidateLiveOuts()) { LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n"); @@ -655,9 +657,13 @@ bool LowOverheadLoop::ValidateTailPredicate() { // instructions in the preheader. auto CannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - for (; I != E; ++I) - if (shouldInspect(*I)) + for (; I != E; ++I) { + if (shouldInspect(*I)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP" + << " insertion: " << *I); return true; + } + } return false; }; @@ -719,11 +725,17 @@ bool LowOverheadLoop::ValidateTailPredicate() { continue; if (isSubImmOpcode(MI->getOpcode())) { - if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) + if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element" + " count: " << *MI); return false; + } FoundSub = true; - } else + } else { + LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element" + " count: " << *MI); return false; + } } ToRemove.insert(ElementChain.begin(), ElementChain.end()); } @@ -1082,8 +1094,14 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { Revert = true; return; } - TryAdjustInsertionPoint(StartInsertPt, Start, RDA); + LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end()) + dbgs() << "ARM Loops: Will insert LoopStart at end of block\n"; + else + dbgs() << "ARM Loops: Will insert LoopStart at " + << *StartInsertPt + ); + Revert = !ValidateRanges(Start, End, BBUtils, ML); CannotTailPredicate = !ValidateTailPredicate(); } From d53b4bee0ccd408cfe6e592540858046244e74ce Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 30 Sep 2020 11:16:22 +0100 Subject: [PATCH 264/544] [LoopFlatten] Add a loop-flattening pass This is a simple pass that flattens nested loops. The intention is to optimise loop nests like this, which together access an array linearly: for (int i = 0; i < N; ++i) for (int j = 0; j < M; ++j) f(A[i*M+j]); into one loop: for (int i = 0; i < (N*M); ++i) f(A[i]); It can also flatten loops where the induction variables are not used in the loop. This can help with codesize and runtime, especially on simple cpus without advanced branch prediction. This is only worth flattening if the induction variables are only used in an expression like i*M+j. If they had any other uses, we would have to insert a div/mod to reconstruct the original values, so this wouldn't be profitable. This partially fixes PR40581 as this pass triggers on one of the two cases. I will follow up on this to learn LoopFlatten a few more (small) tricks. Please note that LoopFlatten is not yet enabled by default. Patch by Oliver Stannard, with minor tweaks from Dave Green and myself. Differential Revision: https://reviews.llvm.org/D42365 --- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Transforms/Scalar.h | 6 + .../llvm/Transforms/Scalar/LoopFlatten.h | 33 + llvm/lib/Passes/PassBuilder.cpp | 7 + llvm/lib/Passes/PassRegistry.def | 1 + .../lib/Transforms/IPO/PassManagerBuilder.cpp | 10 + llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 605 ++++++++++++++++++ llvm/lib/Transforms/Scalar/Scalar.cpp | 5 + .../LoopFlatten/loop-flatten-negative.ll | 395 ++++++++++++ .../Transforms/LoopFlatten/loop-flatten.ll | 591 +++++++++++++++++ llvm/test/Transforms/LoopFlatten/pr40581.ll | 108 ++++ 13 files changed, 1764 insertions(+) create mode 100644 llvm/include/llvm/Transforms/Scalar/LoopFlatten.h create mode 100644 llvm/lib/Transforms/Scalar/LoopFlatten.cpp create mode 100644 llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll create mode 100644 llvm/test/Transforms/LoopFlatten/loop-flatten.ll create mode 100644 llvm/test/Transforms/LoopFlatten/pr40581.ll diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index dd4f9d714cb98..bbc506ceca190 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -242,6 +242,7 @@ void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&); void initializeLoopInterchangePass(PassRegistry&); +void initializeLoopFlattenLegacyPassPass(PassRegistry&); void initializeLoopLoadEliminationPass(PassRegistry&); void initializeLoopPassPass(PassRegistry&); void initializeLoopPredicationLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 157311a76b3d5..b9cd6158dbe44 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -127,6 +127,7 @@ namespace { (void) llvm::createLazyValueInfoPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopInterchangePass(); + (void) llvm::createLoopFlattenPass(); (void) llvm::createLoopPredicationPass(); (void) llvm::createLoopSimplifyPass(); (void) llvm::createLoopSimplifyCFGPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 9175a4d73162b..50946b54fb985 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -149,6 +149,12 @@ Pass *createLoopPredicationPass(); // Pass *createLoopInterchangePass(); +//===----------------------------------------------------------------------===// +// +// LoopFlatten - This pass flattens nested loops into a single loop. +// +Pass *createLoopFlattenPass(); + //===----------------------------------------------------------------------===// // // LoopStrengthReduce - This pass is strength reduces GEP instructions that use diff --git a/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h new file mode 100644 index 0000000000000..9d1c44c1732c9 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h @@ -0,0 +1,33 @@ +//===- LoopFlatten.h - Loop Flatten ---------------- -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Loop Flatten Pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H +#define LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class LoopFlattenPass : public PassInfoMixin { +public: + LoopFlattenPass() = default; + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 1ea73195740c1..af87f5e23a537 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -150,6 +150,7 @@ #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/Transforms/Scalar/LoopFlatten.h" #include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" @@ -250,6 +251,10 @@ static cl::opt EnableUnrollAndJam( "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)")); +static cl::opt EnableLoopFlatten( + "enable-npm-loop-flatten", cl::init(false), cl::Hidden, + cl::desc("Enable the Loop flattening pass for the new PM (default = off)")); + static cl::opt EnableSyntheticCounts( "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Run synthetic function entry count generation " @@ -510,6 +515,8 @@ FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline( C(LPM2, Level); LPM2.addPass(LoopDeletionPass()); + if (EnableLoopFlatten) + LPM2.addPass(LoopFlattenPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. The normal unroller doesn't pay attention to forced full unroll diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 0111fc494c43c..be0ab2cc398ed 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -359,6 +359,7 @@ LOOP_PASS("loop-rotate", LoopRotatePass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) LOOP_PASS("print", PrintLoopPass(dbgs())) LOOP_PASS("loop-deletion", LoopDeletionPass()) +LOOP_PASS("loop-flatten", LoopFlattenPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 4aef39c031c5c..c63705a4ee947 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -92,6 +92,10 @@ static cl::opt EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass")); +static cl::opt EnableLoopFlatten("enable-loop-flatten", cl::init(false), + cl::Hidden, + cl::desc("Enable the LoopFlatten Pass")); + static cl::opt EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); @@ -444,6 +448,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (EnableLoopInterchange) MPM.add(createLoopInterchangePass()); // Interchange loops + if (EnableLoopFlatten) { + MPM.add(createLoopFlattenPass()); // Flatten loops + MPM.add(createLoopSimplifyCFGPass()); + } // Unroll small loops MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, @@ -1035,6 +1043,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createLoopDeletionPass()); if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); + if (EnableLoopFlatten) + PM.add(createLoopFlattenPass()); // Unroll small loops PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index ae62aa0220724..030fb31bc6463 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -32,6 +32,7 @@ add_llvm_component_library(LLVMScalarOpts LoopIdiomRecognize.cpp LoopInstSimplify.cpp LoopInterchange.cpp + LoopFlatten.cpp LoopLoadElimination.cpp LoopPassManager.cpp LoopPredication.cpp diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp new file mode 100644 index 0000000000000..4d844093ff132 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -0,0 +1,605 @@ +//===- LoopFlatten.cpp - Loop flattening pass------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass flattens pairs nested loops into a single loop. +// +// The intention is to optimise loop nests like this, which together access an +// array linearly: +// for (int i = 0; i < N; ++i) +// for (int j = 0; j < M; ++j) +// f(A[i*M+j]); +// into one loop: +// for (int i = 0; i < (N*M); ++i) +// f(A[i]); +// +// It can also flatten loops where the induction variables are not used in the +// loop. This is only worth doing if the induction variables are only used in an +// expression like i*M+j. If they had any other uses, we would have to insert a +// div/mod to reconstruct the original values, so this wouldn't be profitable. +// +// We also need to prove that N*M will not overflow. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LoopFlatten.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "loop-flatten" + +using namespace llvm; +using namespace llvm::PatternMatch; + +static cl::opt RepeatedInstructionThreshold( + "loop-flatten-cost-threshold", cl::Hidden, cl::init(2), + cl::desc("Limit on the cost of instructions that can be repeated due to " + "loop flattening")); + +static cl::opt + AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, + cl::init(false), + cl::desc("Assume that the product of the two iteration " + "limits will never overflow")); + +// Finds the induction variable, increment and limit for a simple loop that we +// can flatten. +static bool findLoopComponents( + Loop *L, SmallPtrSetImpl &IterationInstructions, + PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment, + BranchInst *&BackBranch, ScalarEvolution *SE) { + LLVM_DEBUG(dbgs() << "Finding components of loop: " << L->getName() << "\n"); + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Loop is not in normal form\n"); + return false; + } + + // There must be exactly one exiting block, and it must be the same at the + // latch. + BasicBlock *Latch = L->getLoopLatch(); + if (L->getExitingBlock() != Latch) { + LLVM_DEBUG(dbgs() << "Exiting and latch block are different\n"); + return false; + } + // Latch block must end in a conditional branch. + BackBranch = dyn_cast(Latch->getTerminator()); + if (!BackBranch || !BackBranch->isConditional()) { + LLVM_DEBUG(dbgs() << "Could not find back-branch\n"); + return false; + } + IterationInstructions.insert(BackBranch); + LLVM_DEBUG(dbgs() << "Found back branch: "; BackBranch->dump()); + bool ContinueOnTrue = L->contains(BackBranch->getSuccessor(0)); + + // Find the induction PHI. If there is no induction PHI, we can't do the + // transformation. TODO: could other variables trigger this? Do we have to + // search for the best one? + InductionPHI = nullptr; + for (PHINode &PHI : L->getHeader()->phis()) { + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) { + InductionPHI = &PHI; + LLVM_DEBUG(dbgs() << "Found induction PHI: "; InductionPHI->dump()); + break; + } + } + if (!InductionPHI) { + LLVM_DEBUG(dbgs() << "Could not find induction PHI\n"); + return false; + } + + auto IsValidPredicate = [&](ICmpInst::Predicate Pred) { + if (ContinueOnTrue) + return Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT; + else + return Pred == CmpInst::ICMP_EQ; + }; + + // Find Compare and make sure it is valid + ICmpInst *Compare = dyn_cast(BackBranch->getCondition()); + if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) || + Compare->hasNUsesOrMore(2)) { + LLVM_DEBUG(dbgs() << "Could not find valid comparison\n"); + return false; + } + IterationInstructions.insert(Compare); + LLVM_DEBUG(dbgs() << "Found comparison: "; Compare->dump()); + + // Find increment and limit from the compare + Increment = nullptr; + if (match(Compare->getOperand(0), + m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { + Increment = dyn_cast(Compare->getOperand(0)); + Limit = Compare->getOperand(1); + } else if (Compare->getUnsignedPredicate() == CmpInst::ICMP_NE && + match(Compare->getOperand(1), + m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) { + Increment = dyn_cast(Compare->getOperand(1)); + Limit = Compare->getOperand(0); + } + if (!Increment || Increment->hasNUsesOrMore(3)) { + LLVM_DEBUG(dbgs() << "Cound not find valid increment\n"); + return false; + } + IterationInstructions.insert(Increment); + LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump()); + LLVM_DEBUG(dbgs() << "Found limit: "; Limit->dump()); + + assert(InductionPHI->getNumIncomingValues() == 2); + assert(InductionPHI->getIncomingValueForBlock(Latch) == Increment && + "PHI value is not increment inst"); + + auto *CI = dyn_cast( + InductionPHI->getIncomingValueForBlock(L->getLoopPreheader())); + if (!CI || !CI->isZero()) { + LLVM_DEBUG(dbgs() << "PHI value is not zero: "; CI->dump()); + return false; + } + + LLVM_DEBUG(dbgs() << "Successfully found all loop components\n"); + return true; +} + +static bool checkPHIs(Loop *OuterLoop, Loop *InnerLoop, + SmallPtrSetImpl &InnerPHIsToTransform, + PHINode *InnerInductionPHI, PHINode *OuterInductionPHI, + TargetTransformInfo *TTI) { + // All PHIs in the inner and outer headers must either be: + // - The induction PHI, which we are going to rewrite as one induction in + // the new loop. This is already checked by findLoopComponents. + // - An outer header PHI with all incoming values from outside the loop. + // LoopSimplify guarantees we have a pre-header, so we don't need to + // worry about that here. + // - Pairs of PHIs in the inner and outer headers, which implement a + // loop-carried dependency that will still be valid in the new loop. To + // be valid, this variable must be modified only in the inner loop. + + // The set of PHI nodes in the outer loop header that we know will still be + // valid after the transformation. These will not need to be modified (with + // the exception of the induction variable), but we do need to check that + // there are no unsafe PHI nodes. + SmallPtrSet SafeOuterPHIs; + SafeOuterPHIs.insert(OuterInductionPHI); + + // Check that all PHI nodes in the inner loop header match one of the valid + // patterns. + for (PHINode &InnerPHI : InnerLoop->getHeader()->phis()) { + // The induction PHIs break these rules, and that's OK because we treat + // them specially when doing the transformation. + if (&InnerPHI == InnerInductionPHI) + continue; + + // Each inner loop PHI node must have two incoming values/blocks - one + // from the pre-header, and one from the latch. + assert(InnerPHI.getNumIncomingValues() == 2); + Value *PreHeaderValue = + InnerPHI.getIncomingValueForBlock(InnerLoop->getLoopPreheader()); + Value *LatchValue = + InnerPHI.getIncomingValueForBlock(InnerLoop->getLoopLatch()); + + // The incoming value from the outer loop must be the PHI node in the + // outer loop header, with no modifications made in the top of the outer + // loop. + PHINode *OuterPHI = dyn_cast(PreHeaderValue); + if (!OuterPHI || OuterPHI->getParent() != OuterLoop->getHeader()) { + LLVM_DEBUG(dbgs() << "value modified in top of outer loop\n"); + return false; + } + + // The other incoming value must come from the inner loop, without any + // modifications in the tail end of the outer loop. We are in LCSSA form, + // so this will actually be a PHI in the inner loop's exit block, which + // only uses values from inside the inner loop. + PHINode *LCSSAPHI = dyn_cast( + OuterPHI->getIncomingValueForBlock(OuterLoop->getLoopLatch())); + if (!LCSSAPHI) { + LLVM_DEBUG(dbgs() << "could not find LCSSA PHI\n"); + return false; + } + + // The value used by the LCSSA PHI must be the same one that the inner + // loop's PHI uses. + if (LCSSAPHI->hasConstantValue() != LatchValue) { + LLVM_DEBUG( + dbgs() << "LCSSA PHI incoming value does not match latch value\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "PHI pair is safe:\n"); + LLVM_DEBUG(dbgs() << " Inner: "; InnerPHI.dump()); + LLVM_DEBUG(dbgs() << " Outer: "; OuterPHI->dump()); + SafeOuterPHIs.insert(OuterPHI); + InnerPHIsToTransform.insert(&InnerPHI); + } + + for (PHINode &OuterPHI : OuterLoop->getHeader()->phis()) { + if (!SafeOuterPHIs.count(&OuterPHI)) { + LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); + return false; + } + } + + return true; +} + +static bool +checkOuterLoopInsts(Loop *OuterLoop, Loop *InnerLoop, + SmallPtrSetImpl &IterationInstructions, + Value *InnerLimit, PHINode *OuterPHI, + TargetTransformInfo *TTI) { + // Check for instructions in the outer but not inner loop. If any of these + // have side-effects then this transformation is not legal, and if there is + // a significant amount of code here which can't be optimised out that it's + // not profitable (as these instructions would get executed for each + // iteration of the inner loop). + unsigned RepeatedInstrCost = 0; + for (auto *B : OuterLoop->getBlocks()) { + if (InnerLoop->contains(B)) + continue; + + for (auto &I : *B) { + if (!isa(&I) && !I.isTerminator() && + !isSafeToSpeculativelyExecute(&I)) { + LLVM_DEBUG(dbgs() << "Cannot flatten because instruction may have " + "side effects: "; + I.dump()); + return false; + } + // The execution count of the outer loop's iteration instructions + // (increment, compare and branch) will be increased, but the + // equivalent instructions will be removed from the inner loop, so + // they make a net difference of zero. + if (IterationInstructions.count(&I)) + continue; + // The uncoditional branch to the inner loop's header will turn into + // a fall-through, so adds no cost. + BranchInst *Br = dyn_cast(&I); + if (Br && Br->isUnconditional() && + Br->getSuccessor(0) == InnerLoop->getHeader()) + continue; + // Multiplies of the outer iteration variable and inner iteration + // count will be optimised out. + if (match(&I, m_c_Mul(m_Specific(OuterPHI), m_Specific(InnerLimit)))) + continue; + int Cost = TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); + RepeatedInstrCost += Cost; + } + } + + LLVM_DEBUG(dbgs() << "Cost of instructions that will be repeated: " + << RepeatedInstrCost << "\n"); + // Bail out if flattening the loops would cause instructions in the outer + // loop but not in the inner loop to be executed extra times. + if (RepeatedInstrCost > RepeatedInstructionThreshold) + return false; + + return true; +} + +static bool checkIVUsers(PHINode *InnerPHI, PHINode *OuterPHI, + BinaryOperator *InnerIncrement, + BinaryOperator *OuterIncrement, Value *InnerLimit, + SmallPtrSetImpl &LinearIVUses) { + // We require all uses of both induction variables to match this pattern: + // + // (OuterPHI * InnerLimit) + InnerPHI + // + // Any uses of the induction variables not matching that pattern would + // require a div/mod to reconstruct in the flattened loop, so the + // transformation wouldn't be profitable. + + // Check that all uses of the inner loop's induction variable match the + // expected pattern, recording the uses of the outer IV. + SmallPtrSet ValidOuterPHIUses; + for (User *U : InnerPHI->users()) { + if (U == InnerIncrement) + continue; + + LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump()); + + Value *MatchedMul, *MatchedItCount; + if (match(U, m_c_Add(m_Specific(InnerPHI), m_Value(MatchedMul))) && + match(MatchedMul, + m_c_Mul(m_Specific(OuterPHI), m_Value(MatchedItCount))) && + MatchedItCount == InnerLimit) { + LLVM_DEBUG(dbgs() << "Use is optimisable\n"); + ValidOuterPHIUses.insert(MatchedMul); + LinearIVUses.insert(U); + } else { + LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n"); + return false; + } + } + + // Check that there are no uses of the outer IV other than the ones found + // as part of the pattern above. + for (User *U : OuterPHI->users()) { + if (U == OuterIncrement) + continue; + + LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump()); + + if (!ValidOuterPHIUses.count(U)) { + LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n"); + return false; + } else { + LLVM_DEBUG(dbgs() << "Use is optimisable\n"); + } + } + + LLVM_DEBUG(dbgs() << "Found " << LinearIVUses.size() + << " value(s) that can be replaced:\n"; + for (Value *V : LinearIVUses) { + dbgs() << " "; + V->dump(); + }); + + return true; +} + +// Return an OverflowResult dependant on if overflow of the multiplication of +// InnerLimit and OuterLimit can be assumed not to happen. +static OverflowResult checkOverflow(Loop *OuterLoop, Value *InnerLimit, + Value *OuterLimit, + SmallPtrSetImpl &LinearIVUses, + DominatorTree *DT, AssumptionCache *AC) { + Function *F = OuterLoop->getHeader()->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + + // For debugging/testing. + if (AssumeNoOverflow) + return OverflowResult::NeverOverflows; + + // Check if the multiply could not overflow due to known ranges of the + // input values. + OverflowResult OR = computeOverflowForUnsignedMul( + InnerLimit, OuterLimit, DL, AC, + OuterLoop->getLoopPreheader()->getTerminator(), DT); + if (OR != OverflowResult::MayOverflow) + return OR; + + for (Value *V : LinearIVUses) { + for (Value *U : V->users()) { + if (auto *GEP = dyn_cast(U)) { + // The IV is used as the operand of a GEP, and the IV is at least as + // wide as the address space of the GEP. In this case, the GEP would + // wrap around the address space before the IV increment wraps, which + // would be UB. + if (GEP->isInBounds() && + V->getType()->getIntegerBitWidth() >= + DL.getPointerTypeSizeInBits(GEP->getType())) { + LLVM_DEBUG( + dbgs() << "use of linear IV would be UB if overflow occurred: "; + GEP->dump()); + return OverflowResult::NeverOverflows; + } + } + } + } + + return OverflowResult::MayOverflow; +} + +static bool FlattenLoopPair(Loop *OuterLoop, Loop *InnerLoop, DominatorTree *DT, + LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, TargetTransformInfo *TTI, + std::function markLoopAsDeleted) { + Function *F = OuterLoop->getHeader()->getParent(); + + LLVM_DEBUG(dbgs() << "Loop flattening running on outer loop " + << OuterLoop->getHeader()->getName() << " and inner loop " + << InnerLoop->getHeader()->getName() << " in " + << F->getName() << "\n"); + + SmallPtrSet IterationInstructions; + + PHINode *InnerInductionPHI, *OuterInductionPHI; + Value *InnerLimit, *OuterLimit; + BinaryOperator *InnerIncrement, *OuterIncrement; + BranchInst *InnerBranch, *OuterBranch; + + if (!findLoopComponents(InnerLoop, IterationInstructions, InnerInductionPHI, + InnerLimit, InnerIncrement, InnerBranch, SE)) + return false; + if (!findLoopComponents(OuterLoop, IterationInstructions, OuterInductionPHI, + OuterLimit, OuterIncrement, OuterBranch, SE)) + return false; + + // Both of the loop limit values must be invariant in the outer loop + // (non-instructions are all inherently invariant). + if (!OuterLoop->isLoopInvariant(InnerLimit)) { + LLVM_DEBUG(dbgs() << "inner loop limit not invariant\n"); + return false; + } + if (!OuterLoop->isLoopInvariant(OuterLimit)) { + LLVM_DEBUG(dbgs() << "outer loop limit not invariant\n"); + return false; + } + + SmallPtrSet InnerPHIsToTransform; + if (!checkPHIs(OuterLoop, InnerLoop, InnerPHIsToTransform, InnerInductionPHI, + OuterInductionPHI, TTI)) + return false; + + // FIXME: it should be possible to handle different types correctly. + if (InnerInductionPHI->getType() != OuterInductionPHI->getType()) + return false; + + if (!checkOuterLoopInsts(OuterLoop, InnerLoop, IterationInstructions, + InnerLimit, OuterInductionPHI, TTI)) + return false; + + // Find the values in the loop that can be replaced with the linearized + // induction variable, and check that there are no other uses of the inner + // or outer induction variable. If there were, we could still do this + // transformation, but we'd have to insert a div/mod to calculate the + // original IVs, so it wouldn't be profitable. + SmallPtrSet LinearIVUses; + if (!checkIVUsers(InnerInductionPHI, OuterInductionPHI, InnerIncrement, + OuterIncrement, InnerLimit, LinearIVUses)) + return false; + + // Check if the new iteration variable might overflow. In this case, we + // need to version the loop, and select the original version at runtime if + // the iteration space is too large. + // TODO: We currently don't version the loop. + // TODO: it might be worth using a wider iteration variable rather than + // versioning the loop, if a wide enough type is legal. + bool MustVersionLoop = true; + OverflowResult OR = + checkOverflow(OuterLoop, InnerLimit, OuterLimit, LinearIVUses, DT, AC); + if (OR == OverflowResult::AlwaysOverflowsHigh || + OR == OverflowResult::AlwaysOverflowsLow) { + LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n"); + return false; + } else if (OR == OverflowResult::MayOverflow) { + LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n"); + } else { + LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n"); + MustVersionLoop = false; + } + + // We cannot safely flatten the loop. Exit now. + if (MustVersionLoop) + return false; + + // Do the actual transformation. + LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n"); + + { + using namespace ore; + OptimizationRemark Remark(DEBUG_TYPE, "Flattened", InnerLoop->getStartLoc(), + InnerLoop->getHeader()); + OptimizationRemarkEmitter ORE(F); + Remark << "Flattened into outer loop"; + ORE.emit(Remark); + } + + Value *NewTripCount = + BinaryOperator::CreateMul(InnerLimit, OuterLimit, "flatten.tripcount", + OuterLoop->getLoopPreheader()->getTerminator()); + LLVM_DEBUG(dbgs() << "Created new trip count in preheader: "; + NewTripCount->dump()); + + // Fix up PHI nodes that take values from the inner loop back-edge, which + // we are about to remove. + InnerInductionPHI->removeIncomingValue(InnerLoop->getLoopLatch()); + for (PHINode *PHI : InnerPHIsToTransform) + PHI->removeIncomingValue(InnerLoop->getLoopLatch()); + + // Modify the trip count of the outer loop to be the product of the two + // trip counts. + cast(OuterBranch->getCondition())->setOperand(1, NewTripCount); + + // Replace the inner loop backedge with an unconditional branch to the exit. + BasicBlock *InnerExitBlock = InnerLoop->getExitBlock(); + BasicBlock *InnerExitingBlock = InnerLoop->getExitingBlock(); + InnerExitingBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(InnerExitBlock, InnerExitingBlock); + DT->deleteEdge(InnerExitingBlock, InnerLoop->getHeader()); + + // Replace all uses of the polynomial calculated from the two induction + // variables with the one new one. + for (Value *V : LinearIVUses) + V->replaceAllUsesWith(OuterInductionPHI); + + // Tell LoopInfo, SCEV and the pass manager that the inner loop has been + // deleted, and any information that have about the outer loop invalidated. + markLoopAsDeleted(InnerLoop); + SE->forgetLoop(OuterLoop); + SE->forgetLoop(InnerLoop); + LI->erase(InnerLoop); + + return true; +} + +PreservedAnalyses LoopFlattenPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &Updater) { + if (L.getSubLoops().size() != 1) + return PreservedAnalyses::all(); + + Loop *InnerLoop = *L.begin(); + std::string LoopName(InnerLoop->getName()); + if (!FlattenLoopPair( + &L, InnerLoop, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, + [&](Loop *L) { Updater.markLoopAsDeleted(*L, LoopName); })) + return PreservedAnalyses::all(); + return getLoopPassPreservedAnalyses(); +} + +namespace { +class LoopFlattenLegacyPass : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + LoopFlattenLegacyPass() : LoopPass(ID) { + initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + // Possibly flatten loop L into its child. + bool runOnLoop(Loop *L, LPPassManager &) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + getLoopAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} // namespace + +char LoopFlattenLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", + false, false) + +Pass *llvm::createLoopFlattenPass() { return new LoopFlattenLegacyPass(); } + +bool LoopFlattenLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + if (L->getSubLoops().size() != 1) + return false; + + ScalarEvolution *SE = &getAnalysis().getSE(); + LoopInfo *LI = &getAnalysis().getLoopInfo(); + auto *DTWP = getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto &TTIP = getAnalysis(); + TargetTransformInfo *TTI = &TTIP.getTTI(*L->getHeader()->getParent()); + AssumptionCache *AC = + &getAnalysis().getAssumptionCache( + *L->getHeader()->getParent()); + + Loop *InnerLoop = *L->begin(); + return FlattenLoopPair(L, InnerLoop, DT, LI, SE, AC, TTI, + [&](Loop *L) { LPM.markLoopAsDeleted(*L); }); +} diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index ed4c04b155dc8..c373b729dcbe8 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -67,6 +67,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopAccessLegacyAnalysisPass(Registry); initializeLoopInstSimplifyLegacyPassPass(Registry); initializeLoopInterchangePass(Registry); + initializeLoopFlattenLegacyPassPass(Registry); initializeLoopPredicationLegacyPassPass(Registry); initializeLoopRotateLegacyPassPass(Registry); initializeLoopStrengthReducePass(Registry); @@ -186,6 +187,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } +void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopFlattenPass()); +} + void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopIdiomPass()); } diff --git a/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll b/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll new file mode 100644 index 0000000000000..aad23318f6e9e --- /dev/null +++ b/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll @@ -0,0 +1,395 @@ +; RUN: opt < %s -S -loop-flatten -debug-only=loop-flatten 2>&1 | FileCheck %s +; REQUIRES: asserts + +; Every function in this file has a reason that it can't be transformed. + +; CHECK-NOT: Checks all passed, doing the transformation + +; Outer loop does not start at zero +define void @test_1(i32 %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %cmp25 = icmp sgt i32 %N, 0 + br i1 %cmp25, label %for.body4.lr.ph, label %for.cond.cleanup + +for.body4.lr.ph: + %i.026 = phi i32 [ %inc10, %for.cond.cleanup3 ], [ 1, %entry ] + %mul = mul nsw i32 %i.026, %N + br label %for.body4 + +for.body4: + %j.024 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %add = add nsw i32 %j.024, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + %0 = load i32, i32* %arrayidx, align 4 + %mul5 = mul nsw i32 %0, %scale + %arrayidx8 = getelementptr inbounds i32, i32* %C, i32 %add + store i32 %mul5, i32* %arrayidx8, align 4 + %inc = add nuw nsw i32 %j.024, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 + +for.cond.cleanup3: + %inc10 = add nuw nsw i32 %i.026, 1 + %exitcond27 = icmp eq i32 %inc10, %N + br i1 %exitcond27, label %for.cond.cleanup, label %for.body4.lr.ph + +for.cond.cleanup: + ret void +} + +; Inner loop does not start at zero +define void @test_2(i32 %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %cmp25 = icmp sgt i32 %N, 0 + br i1 %cmp25, label %for.body4.lr.ph, label %for.cond.cleanup + +for.body4.lr.ph: + %i.026 = phi i32 [ %inc10, %for.cond.cleanup3 ], [ 0, %entry ] + %mul = mul nsw i32 %i.026, %N + br label %for.body4 + +for.body4: + %j.024 = phi i32 [ 1, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %add = add nsw i32 %j.024, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + %0 = load i32, i32* %arrayidx, align 4 + %mul5 = mul nsw i32 %0, %scale + %arrayidx8 = getelementptr inbounds i32, i32* %C, i32 %add + store i32 %mul5, i32* %arrayidx8, align 4 + %inc = add nuw nsw i32 %j.024, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 + +for.cond.cleanup3: + %inc10 = add nuw nsw i32 %i.026, 1 + %exitcond27 = icmp eq i32 %inc10, %N + br i1 %exitcond27, label %for.cond.cleanup, label %for.body4.lr.ph + +for.cond.cleanup: + ret void +} + +; Outer IV used directly +define hidden void @test_3(i16 zeroext %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %conv = zext i16 %N to i32 + %cmp25 = icmp eq i16 %N, 0 + br i1 %cmp25, label %for.cond.cleanup, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %entry + br label %for.body.us + +for.body.us: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us, %for.body.lr.ph.split.us + %i.026.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc12.us, %for.cond2.for.cond.cleanup6_crit_edge.us ] + %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %i.026.us + %mul9.us = mul nuw nsw i32 %i.026.us, %conv + br label %for.body7.us + +for.body7.us: ; preds = %for.body.us, %for.body7.us + %j.024.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body7.us ] + %0 = load i32, i32* %arrayidx.us, align 4 + %mul.us = mul nsw i32 %0, %scale + %add.us = add nuw nsw i32 %j.024.us, %mul9.us + %arrayidx10.us = getelementptr inbounds i32, i32* %C, i32 %add.us + store i32 %mul.us, i32* %arrayidx10.us, align 4 + %inc.us = add nuw nsw i32 %j.024.us, 1 + %exitcond = icmp ne i32 %inc.us, %conv + br i1 %exitcond, label %for.body7.us, label %for.cond2.for.cond.cleanup6_crit_edge.us + +for.cond2.for.cond.cleanup6_crit_edge.us: ; preds = %for.body7.us + %inc12.us = add nuw nsw i32 %i.026.us, 1 + %exitcond27 = icmp ne i32 %inc12.us, %conv + br i1 %exitcond27, label %for.body.us, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void +} + +; Inner IV used directly +define hidden void @test_4(i16 zeroext %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %conv = zext i16 %N to i32 + %cmp25 = icmp eq i16 %N, 0 + br i1 %cmp25, label %for.cond.cleanup, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %entry + br label %for.body.us + +for.body.us: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us, %for.body.lr.ph.split.us + %i.026.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc12.us, %for.cond2.for.cond.cleanup6_crit_edge.us ] + %mul9.us = mul nuw nsw i32 %i.026.us, %conv + br label %for.body7.us + +for.body7.us: ; preds = %for.body.us, %for.body7.us + %j.024.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body7.us ] + %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %j.024.us + %0 = load i32, i32* %arrayidx.us, align 4 + %mul.us = mul nsw i32 %0, %scale + %add.us = add nuw nsw i32 %j.024.us, %mul9.us + %arrayidx10.us = getelementptr inbounds i32, i32* %C, i32 %add.us + store i32 %mul.us, i32* %arrayidx10.us, align 4 + %inc.us = add nuw nsw i32 %j.024.us, 1 + %exitcond = icmp ne i32 %inc.us, %conv + br i1 %exitcond, label %for.body7.us, label %for.cond2.for.cond.cleanup6_crit_edge.us + +for.cond2.for.cond.cleanup6_crit_edge.us: ; preds = %for.body7.us + %inc12.us = add nuw nsw i32 %i.026.us, 1 + %exitcond27 = icmp ne i32 %inc12.us, %conv + br i1 %exitcond27, label %for.body.us, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void +} + +; Inner iteration count not invariant in outer loop +declare i32 @get_int() readonly +define void @test_5(i16 zeroext %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %conv = zext i16 %N to i32 + %cmp27 = icmp eq i16 %N, 0 + br i1 %cmp27, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.cond.cleanup5 + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.lr.ph, %for.cond.cleanup5 + %i.028 = phi i32 [ 0, %for.body.lr.ph ], [ %inc12, %for.cond.cleanup5 ] + %call = tail call i32 @get_int() + %cmp325 = icmp sgt i32 %call, 0 + br i1 %cmp325, label %for.body6.lr.ph, label %for.cond.cleanup5 + +for.body6.lr.ph: ; preds = %for.body + %mul = mul nsw i32 %call, %i.028 + br label %for.body6 + +for.cond.cleanup5.loopexit: ; preds = %for.body6 + br label %for.cond.cleanup5 + +for.cond.cleanup5: ; preds = %for.cond.cleanup5.loopexit, %for.body + %inc12 = add nuw nsw i32 %i.028, 1 + %exitcond29 = icmp ne i32 %inc12, %conv + br i1 %exitcond29, label %for.body, label %for.cond.cleanup.loopexit + +for.body6: ; preds = %for.body6.lr.ph, %for.body6 + %j.026 = phi i32 [ 0, %for.body6.lr.ph ], [ %inc, %for.body6 ] + %add = add nsw i32 %j.026, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + %0 = load i32, i32* %arrayidx, align 4 + %mul7 = mul nsw i32 %0, %scale + %arrayidx10 = getelementptr inbounds i32, i32* %C, i32 %add + store i32 %mul7, i32* %arrayidx10, align 4 + %inc = add nuw nsw i32 %j.026, 1 + %exitcond = icmp ne i32 %inc, %call + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5.loopexit +} + +; Inner loop has an early exit +define hidden void @test_6(i16 zeroext %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %conv = zext i16 %N to i32 + %cmp39 = icmp eq i16 %N, 0 + br i1 %cmp39, label %for.cond.cleanup, label %for.body.us.preheader + +for.body.us.preheader: ; preds = %entry + br label %for.body.us + +for.body.us: ; preds = %for.body.us.preheader, %cleanup.us + %i.040.us = phi i32 [ %inc19.us, %cleanup.us ], [ 0, %for.body.us.preheader ] + %mul.us = mul nuw nsw i32 %i.040.us, %conv + br label %for.body7.us + +for.body7.us: ; preds = %for.body.us, %if.end.us + %j.038.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %if.end.us ] + %add.us = add nuw nsw i32 %j.038.us, %mul.us + %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us + %0 = load i32, i32* %arrayidx.us, align 4 + %tobool.us = icmp eq i32 %0, 0 + br i1 %tobool.us, label %if.end.us, label %cleanup.us + +cleanup.us: ; preds = %if.end.us, %for.body7.us + %inc19.us = add nuw nsw i32 %i.040.us, 1 + %exitcond = icmp eq i32 %inc19.us, %conv + br i1 %exitcond, label %for.cond.cleanup, label %for.body.us + +if.end.us: ; preds = %for.body7.us + %arrayidx17.us = getelementptr inbounds i32, i32* %C, i32 %add.us + store i32 0, i32* %arrayidx17.us, align 4 + %inc.us = add nuw nsw i32 %j.038.us, 1 + %cmp4.us = icmp ult i32 %inc.us, %conv + br i1 %cmp4.us, label %for.body7.us, label %cleanup.us + +for.cond.cleanup: ; preds = %cleanup.us, %entry + ret void +} + +define hidden void @test_7(i16 zeroext %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %conv = zext i16 %N to i32 + %cmp30 = icmp eq i16 %N, 0 + br i1 %cmp30, label %cleanup, label %for.body.us.preheader + +for.body.us.preheader: ; preds = %entry + br label %for.body.us + +for.body.us: ; preds = %for.body.us.preheader, %for.cond2.for.cond.cleanup6_crit_edge.us + %i.031.us = phi i32 [ %inc15.us, %for.cond2.for.cond.cleanup6_crit_edge.us ], [ 0, %for.body.us.preheader ] + %call.us = tail call i32 @get_int() #2 + %tobool.us = icmp eq i32 %call.us, 0 + br i1 %tobool.us, label %for.body7.lr.ph.us, label %cleanup + +for.body7.us: ; preds = %for.body7.us, %for.body7.lr.ph.us + %j.029.us = phi i32 [ 0, %for.body7.lr.ph.us ], [ %inc.us, %for.body7.us ] + %add.us = add nuw nsw i32 %j.029.us, %mul.us + %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us + %0 = load i32, i32* %arrayidx.us, align 4 + %mul9.us = mul nsw i32 %0, %scale + %arrayidx13.us = getelementptr inbounds i32, i32* %C, i32 %add.us + store i32 %mul9.us, i32* %arrayidx13.us, align 4 + %inc.us = add nuw nsw i32 %j.029.us, 1 + %exitcond = icmp eq i32 %inc.us, %conv + br i1 %exitcond, label %for.cond2.for.cond.cleanup6_crit_edge.us, label %for.body7.us + +for.body7.lr.ph.us: ; preds = %for.body.us + %mul.us = mul nuw nsw i32 %i.031.us, %conv + br label %for.body7.us + +for.cond2.for.cond.cleanup6_crit_edge.us: ; preds = %for.body7.us + %inc15.us = add nuw nsw i32 %i.031.us, 1 + %cmp.us = icmp ult i32 %inc15.us, %conv + br i1 %cmp.us, label %for.body.us, label %cleanup + +cleanup: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us, %for.body.us, %entry + ret void +} + +; Step is not 1 +define i32 @test_8(i32 %val, i16* nocapture %A) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc6 + %i.018 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %mul = mul nuw nsw i32 %i.018, 20 + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %add = add nuw nsw i32 %j.017, %mul + %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add + %0 = load i16, i16* %arrayidx, align 2 + %conv16 = zext i16 %0 to i32 + %add4 = add i32 %conv16, %val + %conv5 = trunc i32 %add4 to i16 + store i16 %conv5, i16* %arrayidx, align 2 + %inc = add nuw nsw i32 %j.017, 1 + %exitcond = icmp ne i32 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc6 + +for.inc6: ; preds = %for.body3 + %inc7 = add nuw nsw i32 %i.018, 2 + %exitcond19 = icmp ne i32 %inc7, 10 + br i1 %exitcond19, label %for.body, label %for.end8 + +for.end8: ; preds = %for.inc6 + ret i32 10 +} + + +; Step is not 1 +define i32 @test_9(i32 %val, i16* nocapture %A) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc6 + %i.018 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %mul = mul nuw nsw i32 %i.018, 20 + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %add = add nuw nsw i32 %j.017, %mul + %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add + %0 = load i16, i16* %arrayidx, align 2 + %conv16 = zext i16 %0 to i32 + %add4 = add i32 %conv16, %val + %conv5 = trunc i32 %add4 to i16 + store i16 %conv5, i16* %arrayidx, align 2 + %inc = add nuw nsw i32 %j.017, 2 + %exitcond = icmp ne i32 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc6 + +for.inc6: ; preds = %for.body3 + %inc7 = add nuw nsw i32 %i.018, 1 + %exitcond19 = icmp ne i32 %inc7, 10 + br i1 %exitcond19, label %for.body, label %for.end8 + +for.end8: ; preds = %for.inc6 + ret i32 10 +} + + +; Outer loop conditional phi +define i32 @e() { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.end16 + %f.033 = phi i32 [ 0, %entry ], [ %inc18, %for.end16 ] + %g.032 = phi i32 [ undef, %entry ], [ %g.3.lcssa, %for.end16 ] + %.pr = add i32 10, 10 + %tobool29 = icmp eq i32 %.pr, 0 + br i1 %tobool29, label %for.end, label %for.body2.lr.ph + +for.body2.lr.ph: ; preds = %for.body + br label %for.cond1.for.end_crit_edge + +for.cond1.for.end_crit_edge: ; preds = %for.body2.lr.ph + br label %for.end + +for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body + %g.1.lcssa = phi i32 [ 0, %for.cond1.for.end_crit_edge ], [ %g.032, %for.body ] + br label %for.body5 + +for.body5: ; preds = %for.end, %lor.end + %i.031 = phi i32 [ 0, %for.end ], [ %inc15, %lor.end ] + %g.230 = phi i32 [ %g.1.lcssa, %for.end ], [ %g.3, %lor.end ] + %0 = add i32 10, 10 + %1 = add i32 10, 10 + %tobool9 = icmp eq i32 %1, 0 + br i1 %tobool9, label %lor.rhs, label %lor.end + +lor.rhs: ; preds = %for.body5 + %2 = add i32 10, 10 + %call11 = add i32 10, 10 + %tobool12 = icmp ne i32 %call11, 0 + br label %lor.end + +lor.end: ; preds = %for.body5, %lor.rhs + %g.3 = phi i32 [ %g.230, %for.body5 ], [ %call11, %lor.rhs ] + %3 = phi i1 [ true, %for.body5 ], [ %tobool12, %lor.rhs ] + %lor.ext = zext i1 %3 to i32 + %inc15 = add nuw nsw i32 %i.031, 1 + %exitcond = icmp ne i32 %inc15, 9 + br i1 %exitcond, label %for.body5, label %for.end16 + +for.end16: ; preds = %lor.end + %g.3.lcssa = phi i32 [ %g.3, %lor.end ] + %inc18 = add nuw nsw i32 %f.033, 1 + %exitcond34 = icmp ne i32 %inc18, 7 + br i1 %exitcond34, label %for.body, label %for.end19 + +for.end19: ; preds = %for.end16 + ret i32 undef +} diff --git a/llvm/test/Transforms/LoopFlatten/loop-flatten.ll b/llvm/test/Transforms/LoopFlatten/loop-flatten.ll new file mode 100644 index 0000000000000..2d7f897472cea --- /dev/null +++ b/llvm/test/Transforms/LoopFlatten/loop-flatten.ll @@ -0,0 +1,591 @@ +; RUN: opt < %s -S -loop-flatten -verify-loop-info -verify-dom-info -verify-scev -verify | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; CHECK-LABEL: test1 +; Simple loop where the IV's is constant +define i32 @test1(i32 %val, i16* nocapture %A) { +entry: + br label %for.body +; CHECK: entry: +; CHECK: %flatten.tripcount = mul i32 20, 10 +; CHECK: br label %for.body + +for.body: ; preds = %entry, %for.inc6 + %i.018 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %mul = mul nuw nsw i32 %i.018, 20 + br label %for.body3 +; CHECK: for.body: +; CHECK: %i.018 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] +; CHECK: %mul = mul nuw nsw i32 %i.018, 20 +; CHECK: br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %add = add nuw nsw i32 %j.017, %mul + %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add + %0 = load i16, i16* %arrayidx, align 2 + %conv16 = zext i16 %0 to i32 + %add4 = add i32 %conv16, %val + %conv5 = trunc i32 %add4 to i16 + store i16 %conv5, i16* %arrayidx, align 2 + %inc = add nuw nsw i32 %j.017, 1 + %exitcond = icmp ne i32 %inc, 20 + br i1 %exitcond, label %for.body3, label %for.inc6 +; CHECK: for.body3: +; CHECK: %j.017 = phi i32 [ 0, %for.body ] +; CHECK: %add = add nuw nsw i32 %j.017, %mul +; CHECK: %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.018 +; CHECK: %0 = load i16, i16* %arrayidx, align 2 +; CHECK: %conv16 = zext i16 %0 to i32 +; CHECK: %add4 = add i32 %conv16, %val +; CHECK: %conv5 = trunc i32 %add4 to i16 +; CHECK: store i16 %conv5, i16* %arrayidx, align 2 +; CHECK: %inc = add nuw nsw i32 %j.017, 1 +; CHECK: %exitcond = icmp ne i32 %inc, 20 +; CHECK: br label %for.inc6 + +for.inc6: ; preds = %for.body3 + %inc7 = add nuw nsw i32 %i.018, 1 + %exitcond19 = icmp ne i32 %inc7, 10 + br i1 %exitcond19, label %for.body, label %for.end8 +; CHECK: for.inc6: +; CHECK: %inc7 = add nuw nsw i32 %i.018, 1 +; CHECK: %exitcond19 = icmp ne i32 %inc7, %flatten.tripcount +; CHECK: br i1 %exitcond19, label %for.body, label %for.end8 + +for.end8: ; preds = %for.inc6 + ret i32 10 +} + + +; CHECK-LABEL: test2 +; Same as above but non constant IV (which still cannot overflow) +define i32 @test2(i8 zeroext %I, i32 %val, i16* nocapture %A) { +entry: + %conv = zext i8 %I to i32 + %cmp26 = icmp eq i8 %I, 0 + br i1 %cmp26, label %for.end13, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %entry + br label %for.body.us +; CHECK: for.body.lr.ph.split.us: +; CHECK: %flatten.tripcount = mul i32 %conv, %conv +; CHECK: br label %for.body.us + +for.body.us: ; preds = %for.cond2.for.inc11_crit_edge.us, %for.body.lr.ph.split.us + %i.027.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc12.us, %for.cond2.for.inc11_crit_edge.us ] + %mul.us = mul nuw nsw i32 %i.027.us, %conv + br label %for.body6.us +; CHECK: for.body.us: +; CHECK: %i.027.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc12.us, %for.cond2.for.inc11_crit_edge.us ] +; CHECK: %mul.us = mul nuw nsw i32 %i.027.us, %conv +; CHECK: br label %for.body6.us + +for.body6.us: ; preds = %for.body.us, %for.body6.us + %j.025.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body6.us ] + %add.us = add nuw nsw i32 %j.025.us, %mul.us + %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %add.us + %0 = load i16, i16* %arrayidx.us, align 2 + %conv823.us = zext i16 %0 to i32 + %add9.us = add i32 %conv823.us, %val + %conv10.us = trunc i32 %add9.us to i16 + store i16 %conv10.us, i16* %arrayidx.us, align 2 + %inc.us = add nuw nsw i32 %j.025.us, 1 + %exitcond = icmp ne i32 %inc.us, %conv + br i1 %exitcond, label %for.body6.us, label %for.cond2.for.inc11_crit_edge.us +; CHECK: for.body6.us: +; CHECK: %j.025.us = phi i32 [ 0, %for.body.us ] +; CHECK: %add.us = add nuw nsw i32 %j.025.us, %mul.us +; CHECK: %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %i.027.us +; CHECK: %0 = load i16, i16* %arrayidx.us, align 2 +; CHECK: %conv823.us = zext i16 %0 to i32 +; CHECK: %add9.us = add i32 %conv823.us, %val +; CHECK: %conv10.us = trunc i32 %add9.us to i16 +; CHECK: store i16 %conv10.us, i16* %arrayidx.us, align 2 +; CHECK: %inc.us = add nuw nsw i32 %j.025.us, 1 +; CHECK: %exitcond = icmp ne i32 %inc.us, %conv +; CHECK: br label %for.cond2.for.inc11_crit_edge.us + +for.cond2.for.inc11_crit_edge.us: ; preds = %for.body6.us + %inc12.us = add nuw nsw i32 %i.027.us, 1 + %exitcond28 = icmp ne i32 %inc12.us, %conv + br i1 %exitcond28, label %for.body.us, label %for.end13.loopexit +; CHECK: for.cond2.for.inc11_crit_edge.us: ; preds = %for.body6.us +; CHECK: %inc12.us = add nuw nsw i32 %i.027.us, 1 +; CHECK: %exitcond28 = icmp ne i32 %inc12.us, %flatten.tripcount +; CHECK: br i1 %exitcond28, label %for.body.us, label %for.end13.loopexit + +for.end13.loopexit: ; preds = %for.cond2.for.inc11_crit_edge.us + br label %for.end13 + +for.end13: ; preds = %for.end13.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %conv, %for.end13.loopexit ] + ret i32 %i.0.lcssa +} + + +; CHECK-LABEL: test3 +; Same as above, uses load to determine it can't overflow +define i32 @test3(i32 %N, i32 %val, i16* nocapture %A) local_unnamed_addr #0 { +entry: + %cmp21 = icmp eq i32 %N, 0 + br i1 %cmp21, label %for.end8, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %entry + br label %for.body.us +; CHECK: for.body.lr.ph.split.us: +; CHECK: %flatten.tripcount = mul i32 %N, %N +; CHECK: br label %for.body.us + +for.body.us: ; preds = %for.cond1.for.inc6_crit_edge.us, %for.body.lr.ph.split.us + %i.022.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc7.us, %for.cond1.for.inc6_crit_edge.us ] + %mul.us = mul i32 %i.022.us, %N + br label %for.body3.us +; CHECK: for.body.us: +; CHECK: %i.022.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc7.us, %for.cond1.for.inc6_crit_edge.us ] +; CHECK: %mul.us = mul i32 %i.022.us, %N +; CHECK: br label %for.body3.us + +for.body3.us: ; preds = %for.body.us, %for.body3.us + %j.020.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body3.us ] + %add.us = add i32 %j.020.us, %mul.us + %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %add.us + %0 = load i16, i16* %arrayidx.us, align 2 + %conv18.us = zext i16 %0 to i32 + %add4.us = add i32 %conv18.us, %val + %conv5.us = trunc i32 %add4.us to i16 + store i16 %conv5.us, i16* %arrayidx.us, align 2 + %inc.us = add nuw i32 %j.020.us, 1 + %exitcond = icmp ne i32 %inc.us, %N + br i1 %exitcond, label %for.body3.us, label %for.cond1.for.inc6_crit_edge.us +; CHECK: for.body3.us: +; CHECK: %j.020.us = phi i32 [ 0, %for.body.us ] +; CHECK: %add.us = add i32 %j.020.us, %mul.us +; CHECK: %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %i.022.us +; CHECK: %0 = load i16, i16* %arrayidx.us, align 2 +; CHECK: %conv18.us = zext i16 %0 to i32 +; CHECK: %add4.us = add i32 %conv18.us, %val +; CHECK: %conv5.us = trunc i32 %add4.us to i16 +; CHECK: store i16 %conv5.us, i16* %arrayidx.us, align 2 +; CHECK: %inc.us = add nuw i32 %j.020.us, 1 +; CHECK: %exitcond = icmp ne i32 %inc.us, %N +; CHECK: br label %for.cond1.for.inc6_crit_edge.us + +for.cond1.for.inc6_crit_edge.us: ; preds = %for.body3.us + %inc7.us = add nuw i32 %i.022.us, 1 + %exitcond23 = icmp ne i32 %inc7.us, %N + br i1 %exitcond23, label %for.body.us, label %for.end8.loopexit +; CHECK: for.cond1.for.inc6_crit_edge.us: +; CHECK: %inc7.us = add nuw i32 %i.022.us, 1 +; CHECK: %exitcond23 = icmp ne i32 %inc7.us, %flatten.tripcount +; CHECK: br i1 %exitcond23, label %for.body.us, label %for.end8.loopexit + +for.end8.loopexit: ; preds = %for.cond1.for.inc6_crit_edge.us + br label %for.end8 + +for.end8: ; preds = %for.end8.loopexit, %entry + %i.0.lcssa = phi i32 [ 0, %entry ], [ %N, %for.end8.loopexit ] + ret i32 %i.0.lcssa +} + + +; CHECK-LABEL: test4 +; Multiplication cannot overflow, so we can replace the original loop. +define void @test4(i16 zeroext %N, i32* nocapture %C, i32* nocapture readonly %A, i32 %scale) { +entry: + %conv = zext i16 %N to i32 + %cmp30 = icmp eq i16 %N, 0 + br i1 %cmp30, label %for.cond.cleanup, label %for.body.lr.ph.split.us +; CHECK: entry: +; CHECK: %[[LIMIT:.*]] = zext i16 %N to i32 +; CHECK: br i1 %{{.*}} label %for.cond.cleanup, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %entry + br label %for.body.us +; CHECK: for.body.lr.ph.split.us: +; CHECK: %[[TRIPCOUNT:.*]] = mul i32 %[[LIMIT]], %[[LIMIT]] +; CHECK: br label %for.body.us + +for.body.us: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us, %for.body.lr.ph.split.us + %i.031.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc15.us, %for.cond2.for.cond.cleanup6_crit_edge.us ] + %mul.us = mul nuw nsw i32 %i.031.us, %conv + br label %for.body7.us +; CHECK: for.body.us: +; CHECK: %[[OUTER_IV:.*]] = phi i32 +; CHECK: br label %for.body7.us + +for.body7.us: ; preds = %for.body.us, %for.body7.us +; CHECK: for.body7.us: + %j.029.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body7.us ] + %add.us = add nuw nsw i32 %j.029.us, %mul.us + %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us +; CHECK: getelementptr inbounds i32, i32* %A, i32 %[[OUTER_IV]] + %0 = load i32, i32* %arrayidx.us, align 4 + %mul9.us = mul nsw i32 %0, %scale +; CHECK: getelementptr inbounds i32, i32* %C, i32 %[[OUTER_IV]] + %arrayidx13.us = getelementptr inbounds i32, i32* %C, i32 %add.us + store i32 %mul9.us, i32* %arrayidx13.us, align 4 + %inc.us = add nuw nsw i32 %j.029.us, 1 + %exitcond = icmp ne i32 %inc.us, %conv + br i1 %exitcond, label %for.body7.us, label %for.cond2.for.cond.cleanup6_crit_edge.us +; CHECK: br label %for.cond2.for.cond.cleanup6_crit_edge.us + +for.cond2.for.cond.cleanup6_crit_edge.us: ; preds = %for.body7.us + %inc15.us = add nuw nsw i32 %i.031.us, 1 + %exitcond32 = icmp ne i32 %inc15.us, %conv + br i1 %exitcond32, label %for.body.us, label %for.cond.cleanup.loopexit +; CHECK: for.cond2.for.cond.cleanup6_crit_edge.us: +; CHECK: br i1 %exitcond32, label %for.body.us, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: ; preds = %for.cond2.for.cond.cleanup6_crit_edge.us + br label %for.cond.cleanup +; CHECK: for.cond.cleanup.loopexit: +; CHECK: br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void +; CHECK: for.cond.cleanup: +; CHECK: ret void +} + + +; CHECK-LABEL: test5 +define i32 @test5(i8 zeroext %I, i16 zeroext %J) { +entry: + %0 = lshr i8 %I, 1 + %div = zext i8 %0 to i32 + %cmp30 = icmp eq i8 %0, 0 + br i1 %cmp30, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %1 = lshr i16 %J, 1 + %div5 = zext i16 %1 to i32 + %cmp627 = icmp eq i16 %1, 0 + br i1 %cmp627, label %for.body.lr.ph.split, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %for.body.lr.ph + br label %for.body.us +; CHECK: for.body.lr.ph.split.us: +; CHECK: %flatten.tripcount = mul i32 %div5, %div +; CHECK: br label %for.body.us + +for.body.us: ; preds = %for.cond3.for.cond.cleanup8_crit_edge.us, %for.body.lr.ph.split.us + %i.032.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc13.us, %for.cond3.for.cond.cleanup8_crit_edge.us ] + %x.031.us = phi i32 [ 1, %for.body.lr.ph.split.us ], [ %xor.us.lcssa, %for.cond3.for.cond.cleanup8_crit_edge.us ] + br label %for.body9.us +; CHECK: for.body.us: +; CHECK: %i.032.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc13.us, %for.cond3.for.cond.cleanup8_crit_edge.us ] +; CHECK: %x.031.us = phi i32 [ 1, %for.body.lr.ph.split.us ], [ %xor.us.lcssa, %for.cond3.for.cond.cleanup8_crit_edge.us ] +; CHECK: br label %for.body9.us + +for.body9.us: ; preds = %for.body.us, %for.body9.us + %j.029.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body9.us ] + %x.128.us = phi i32 [ %x.031.us, %for.body.us ], [ %xor.us, %for.body9.us ] + %call.us = tail call i32 @func(i32 1) + %sub.us = sub nsw i32 %call.us, %x.128.us + %xor.us = xor i32 %sub.us, %x.128.us + %inc.us = add nuw nsw i32 %j.029.us, 1 + %cmp6.us = icmp ult i32 %inc.us, %div5 + br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us +; CHECK: for.body9.us: +; CHECK: %j.029.us = phi i32 [ 0, %for.body.us ] +; CHECK: %x.128.us = phi i32 [ %x.031.us, %for.body.us ] +; CHECK: %call.us = tail call i32 @func(i32 1) +; CHECK: %sub.us = sub nsw i32 %call.us, %x.128.us +; CHECK: %xor.us = xor i32 %sub.us, %x.128.us +; CHECK: %inc.us = add nuw nsw i32 %j.029.us, 1 +; CHECK: %cmp6.us = icmp ult i32 %inc.us, %div5 +; CHECK: br label %for.cond3.for.cond.cleanup8_crit_edge.us + +for.cond3.for.cond.cleanup8_crit_edge.us: ; preds = %for.body9.us + %xor.us.lcssa = phi i32 [ %xor.us, %for.body9.us ] + %inc13.us = add nuw nsw i32 %i.032.us, 1 + %cmp.us = icmp ult i32 %inc13.us, %div + br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit +; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: +; CHECK: %xor.us.lcssa = phi i32 [ %xor.us, %for.body9.us ] +; CHECK: %inc13.us = add nuw nsw i32 %i.032.us, 1 +; CHECK: %cmp.us = icmp ult i32 %inc13.us, %flatten.tripcount +; CHECK: br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit + +for.body.lr.ph.split: ; preds = %for.body.lr.ph + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.cond3.for.cond.cleanup8_crit_edge.us + %xor.us.lcssa.lcssa = phi i32 [ %xor.us.lcssa, %for.cond3.for.cond.cleanup8_crit_edge.us ] + br label %for.cond.cleanup + +for.cond.cleanup.loopexit34: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit34, %for.cond.cleanup.loopexit, %entry + %x.0.lcssa = phi i32 [ 1, %entry ], [ %xor.us.lcssa.lcssa, %for.cond.cleanup.loopexit ], [ 1, %for.cond.cleanup.loopexit34 ] + ret i32 %x.0.lcssa + +for.body: ; preds = %for.body.lr.ph.split, %for.body + %i.032 = phi i32 [ 0, %for.body.lr.ph.split ], [ %inc13, %for.body ] + %inc13 = add nuw nsw i32 %i.032, 1 + %cmp = icmp ult i32 %inc13, %div + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit34 +} + + +; CHECK-LABEL: test6 +define i32 @test6(i8 zeroext %I, i16 zeroext %J) { +entry: + %0 = lshr i8 %I, 1 + %div = zext i8 %0 to i32 + %cmp30 = icmp eq i8 %0, 0 + br i1 %cmp30, label %for.cond.cleanup, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %1 = lshr i16 %J, 1 + %div5 = zext i16 %1 to i32 + %cmp627 = icmp eq i16 %1, 0 + br i1 %cmp627, label %for.body.lr.ph.split, label %for.body.lr.ph.split.us + +for.body.lr.ph.split.us: ; preds = %for.body.lr.ph + br label %for.body.us +; CHECK: for.body.lr.ph.split.us: +; CHECK: %flatten.tripcount = mul i32 %div5, %div +; CHECK: br label %for.body.us + +for.body.us: ; preds = %for.cond3.for.cond.cleanup8_crit_edge.us, %for.body.lr.ph.split.us + %i.032.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc13.us, %for.cond3.for.cond.cleanup8_crit_edge.us ] + %x.031.us = phi i32 [ 1, %for.body.lr.ph.split.us ], [ %xor.us.lcssa, %for.cond3.for.cond.cleanup8_crit_edge.us ] + %mul.us = mul nuw nsw i32 %i.032.us, %div5 + br label %for.body9.us +; CHECK: for.body.us: +; CHECK: %i.032.us = phi i32 [ 0, %for.body.lr.ph.split.us ], [ %inc13.us, %for.cond3.for.cond.cleanup8_crit_edge.us ] +; CHECK: %x.031.us = phi i32 [ 1, %for.body.lr.ph.split.us ], [ %xor.us.lcssa, %for.cond3.for.cond.cleanup8_crit_edge.us ] +; CHECK: %mul.us = mul nuw nsw i32 %i.032.us, %div5 +; CHECK: br label %for.body9.us + +for.body9.us: ; preds = %for.body.us, %for.body9.us + %j.029.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %for.body9.us ] + %x.128.us = phi i32 [ %x.031.us, %for.body.us ], [ %xor.us, %for.body9.us ] + %add.us = add nuw nsw i32 %j.029.us, %mul.us + %call.us = tail call i32 @func(i32 %add.us) + %sub.us = sub nsw i32 %call.us, %x.128.us + %xor.us = xor i32 %sub.us, %x.128.us + %inc.us = add nuw nsw i32 %j.029.us, 1 + %cmp6.us = icmp ult i32 %inc.us, %div5 + br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us +; CHECK: for.body9.us: +; CHECK: %j.029.us = phi i32 [ 0, %for.body.us ] +; CHECK: %x.128.us = phi i32 [ %x.031.us, %for.body.us ] +; CHECK: %add.us = add nuw nsw i32 %j.029.us, %mul.us +; CHECK: %call.us = tail call i32 @func(i32 %i.032.us) +; CHECK: %sub.us = sub nsw i32 %call.us, %x.128.us +; CHECK: %xor.us = xor i32 %sub.us, %x.128.us +; CHECK: %inc.us = add nuw nsw i32 %j.029.us, 1 +; CHECK: %cmp6.us = icmp ult i32 %inc.us, %div5 +; CHECK: br label %for.cond3.for.cond.cleanup8_crit_edge.us + +for.cond3.for.cond.cleanup8_crit_edge.us: ; preds = %for.body9.us + %xor.us.lcssa = phi i32 [ %xor.us, %for.body9.us ] + %inc13.us = add nuw nsw i32 %i.032.us, 1 + %cmp.us = icmp ult i32 %inc13.us, %div + br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit +; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: +; CHECK: %xor.us.lcssa = phi i32 [ %xor.us, %for.body9.us ] +; CHECK: %inc13.us = add nuw nsw i32 %i.032.us, 1 +; CHECK: %cmp.us = icmp ult i32 %inc13.us, %flatten.tripcount +; CHECK: br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit + +for.body.lr.ph.split: ; preds = %for.body.lr.ph + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.cond3.for.cond.cleanup8_crit_edge.us + %xor.us.lcssa.lcssa = phi i32 [ %xor.us.lcssa, %for.cond3.for.cond.cleanup8_crit_edge.us ] + br label %for.cond.cleanup + +for.cond.cleanup.loopexit34: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit34, %for.cond.cleanup.loopexit, %entry + %x.0.lcssa = phi i32 [ 1, %entry ], [ %xor.us.lcssa.lcssa, %for.cond.cleanup.loopexit ], [ 1, %for.cond.cleanup.loopexit34 ] + ret i32 %x.0.lcssa + +for.body: ; preds = %for.body.lr.ph.split, %for.body + %i.032 = phi i32 [ 0, %for.body.lr.ph.split ], [ %inc13, %for.body ] + %inc13 = add nuw nsw i32 %i.032, 1 + %cmp = icmp ult i32 %inc13, %div + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit34 +} + +; CHECK-LABEL: test7 +; Various inner phis and conditions which we can still work with +define signext i16 @test7(i32 %I, i32 %J, i32* nocapture readonly %C, i16 signext %limit) { +entry: + %cmp43 = icmp eq i32 %J, 0 + br i1 %cmp43, label %for.end17, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %conv = sext i16 %limit to i32 + br label %for.body.us +; CHECK: for.body.lr.ph: +; CHECK: %conv = sext i16 %limit to i32 +; CHECK: %flatten.tripcount = mul i32 %J, %J +; CHECK: br label %for.body.us + +for.body.us: ; preds = %for.cond1.for.inc15_crit_edge.us, %for.body.lr.ph + %i.047.us = phi i32 [ 0, %for.body.lr.ph ], [ %inc16.us, %for.cond1.for.inc15_crit_edge.us ] + %ret.046.us = phi i16 [ 0, %for.body.lr.ph ], [ %ret.2.us.lcssa, %for.cond1.for.inc15_crit_edge.us ] + %prev.045.us = phi i32 [ 0, %for.body.lr.ph ], [ %.lcssa, %for.cond1.for.inc15_crit_edge.us ] + %tmp.044.us = phi i32 [ 0, %for.body.lr.ph ], [ %tmp.2.us.lcssa, %for.cond1.for.inc15_crit_edge.us ] + %mul.us = mul i32 %i.047.us, %J + br label %for.body3.us +; CHECK: for.body.us: +; CHECK: %i.047.us = phi i32 [ 0, %for.body.lr.ph ], [ %inc16.us, %for.cond1.for.inc15_crit_edge.us ] +; CHECK: %ret.046.us = phi i16 [ 0, %for.body.lr.ph ], [ %ret.2.us.lcssa, %for.cond1.for.inc15_crit_edge.us ] +; CHECK: %prev.045.us = phi i32 [ 0, %for.body.lr.ph ], [ %.lcssa, %for.cond1.for.inc15_crit_edge.us ] +; CHECK: %tmp.044.us = phi i32 [ 0, %for.body.lr.ph ], [ %tmp.2.us.lcssa, %for.cond1.for.inc15_crit_edge.us ] +; CHECK: %mul.us = mul i32 %i.047.us, %J +; CHECK: br label %for.body3.us + +for.body3.us: ; preds = %for.body.us, %if.end.us + %j.040.us = phi i32 [ 0, %for.body.us ], [ %inc.us, %if.end.us ] + %ret.139.us = phi i16 [ %ret.046.us, %for.body.us ], [ %ret.2.us, %if.end.us ] + %prev.138.us = phi i32 [ %prev.045.us, %for.body.us ], [ %0, %if.end.us ] + %tmp.137.us = phi i32 [ %tmp.044.us, %for.body.us ], [ %tmp.2.us, %if.end.us ] + %add.us = add i32 %j.040.us, %mul.us + %arrayidx.us = getelementptr inbounds i32, i32* %C, i32 %add.us + %0 = load i32, i32* %arrayidx.us, align 4 + %add4.us = add nsw i32 %0, %tmp.137.us + %cmp5.us = icmp sgt i32 %add4.us, %conv + br i1 %cmp5.us, label %if.then.us, label %if.else.us +; CHECK: for.body3.us: +; CHECK: %j.040.us = phi i32 [ 0, %for.body.us ] +; CHECK: %ret.139.us = phi i16 [ %ret.046.us, %for.body.us ] +; CHECK: %prev.138.us = phi i32 [ %prev.045.us, %for.body.us ] +; CHECK: %tmp.137.us = phi i32 [ %tmp.044.us, %for.body.us ] +; CHECK: %add.us = add i32 %j.040.us, %mul.us +; CHECK: %arrayidx.us = getelementptr inbounds i32, i32* %C, i32 %i.047.us +; CHECK: %0 = load i32, i32* %arrayidx.us, align 4 +; CHECK: %add4.us = add nsw i32 %0, %tmp.137.us +; CHECK: %cmp5.us = icmp sgt i32 %add4.us, %conv +; CHECK: br i1 %cmp5.us, label %if.then.us, label %if.else.us + +if.else.us: ; preds = %for.body3.us + %cmp10.us = icmp sgt i32 %0, %prev.138.us + %cond.us = zext i1 %cmp10.us to i32 + %conv1235.us = zext i16 %ret.139.us to i32 + %add13.us = add nuw nsw i32 %cond.us, %conv1235.us + br label %if.end.us +; CHECK: if.else.us: +; CHECK: %cmp10.us = icmp sgt i32 %0, %prev.138.us +; CHECK: %cond.us = zext i1 %cmp10.us to i32 +; CHECK: %conv1235.us = zext i16 %ret.139.us to i32 +; CHECK: %add13.us = add nuw nsw i32 %cond.us, %conv1235.us +; CHECK: br label %if.end.us + +if.then.us: ; preds = %for.body3.us + %conv7.us = sext i16 %ret.139.us to i32 + %add8.us = add nsw i32 %conv7.us, 10 + br label %if.end.us +; CHECK: if.then.us: +; CHECK: %conv7.us = sext i16 %ret.139.us to i32 +; CHECK: %add8.us = add nsw i32 %conv7.us, 10 +; CHECK: br label %if.end.us + +if.end.us: ; preds = %if.then.us, %if.else.us + %tmp.2.us = phi i32 [ 0, %if.then.us ], [ %add4.us, %if.else.us ] + %ret.2.in.us = phi i32 [ %add8.us, %if.then.us ], [ %add13.us, %if.else.us ] + %ret.2.us = trunc i32 %ret.2.in.us to i16 + %inc.us = add nuw i32 %j.040.us, 1 + %exitcond = icmp ne i32 %inc.us, %J + br i1 %exitcond, label %for.body3.us, label %for.cond1.for.inc15_crit_edge.us +; CHECK: if.end.us: +; CHECK: %tmp.2.us = phi i32 [ 0, %if.then.us ], [ %add4.us, %if.else.us ] +; CHECK: %ret.2.in.us = phi i32 [ %add8.us, %if.then.us ], [ %add13.us, %if.else.us ] +; CHECK: %ret.2.us = trunc i32 %ret.2.in.us to i16 +; CHECK: %inc.us = add nuw i32 %j.040.us, 1 +; CHECK: %exitcond = icmp ne i32 %inc.us, %J +; CHECK: br label %for.cond1.for.inc15_crit_edge.us + +for.cond1.for.inc15_crit_edge.us: ; preds = %if.end.us + %tmp.2.us.lcssa = phi i32 [ %tmp.2.us, %if.end.us ] + %ret.2.us.lcssa = phi i16 [ %ret.2.us, %if.end.us ] + %.lcssa = phi i32 [ %0, %if.end.us ] + %inc16.us = add nuw i32 %i.047.us, 1 + %exitcond49 = icmp ne i32 %inc16.us, %J + br i1 %exitcond49, label %for.body.us, label %for.end17.loopexit +; CHECK: for.cond1.for.inc15_crit_edge.us: +; CHECK: %tmp.2.us.lcssa = phi i32 [ %tmp.2.us, %if.end.us ] +; CHECK: %ret.2.us.lcssa = phi i16 [ %ret.2.us, %if.end.us ] +; CHECK: %.lcssa = phi i32 [ %0, %if.end.us ] +; CHECK: %inc16.us = add nuw i32 %i.047.us, 1 +; CHECK: %exitcond49 = icmp ne i32 %inc16.us, %flatten.tripcount +; CHECK: br i1 %exitcond49, label %for.body.us, label %for.end17.loopexit + +for.end17.loopexit: ; preds = %for.cond1.for.inc15_crit_edge.us + %ret.2.us.lcssa.lcssa = phi i16 [ %ret.2.us.lcssa, %for.cond1.for.inc15_crit_edge.us ] + br label %for.end17 + +for.end17: ; preds = %for.end17.loopexit, %entry + %ret.0.lcssa = phi i16 [ 0, %entry ], [ %ret.2.us.lcssa.lcssa, %for.end17.loopexit ] + ret i16 %ret.0.lcssa +} + +; CHECK-LABEL: test8 +; Same as test1, but with different continue block order +; (uses icmp eq and loops on false) +define i32 @test8(i32 %val, i16* nocapture %A) { +entry: + br label %for.body +; CHECK: entry: +; CHECK: %flatten.tripcount = mul i32 20, 10 +; CHECK: br label %for.body + +for.body: ; preds = %entry, %for.inc6 + %i.018 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %mul = mul nuw nsw i32 %i.018, 20 + br label %for.body3 +; CHECK: for.body: +; CHECK: %i.018 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] +; CHECK: %mul = mul nuw nsw i32 %i.018, 20 +; CHECK: br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %add = add nuw nsw i32 %j.017, %mul + %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add + %0 = load i16, i16* %arrayidx, align 2 + %conv16 = zext i16 %0 to i32 + %add4 = add i32 %conv16, %val + %conv5 = trunc i32 %add4 to i16 + store i16 %conv5, i16* %arrayidx, align 2 + %inc = add nuw nsw i32 %j.017, 1 + %exitcond = icmp eq i32 %inc, 20 + br i1 %exitcond, label %for.inc6, label %for.body3 +; CHECK: for.body3: +; CHECK: %j.017 = phi i32 [ 0, %for.body ] +; CHECK: %add = add nuw nsw i32 %j.017, %mul +; CHECK: %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.018 +; CHECK: %0 = load i16, i16* %arrayidx, align 2 +; CHECK: %conv16 = zext i16 %0 to i32 +; CHECK: %add4 = add i32 %conv16, %val +; CHECK: %conv5 = trunc i32 %add4 to i16 +; CHECK: store i16 %conv5, i16* %arrayidx, align 2 +; CHECK: %inc = add nuw nsw i32 %j.017, 1 +; CHECK: %exitcond = icmp eq i32 %inc, 20 +; CHECK: br label %for.inc6 + +for.inc6: ; preds = %for.body3 + %inc7 = add nuw nsw i32 %i.018, 1 + %exitcond19 = icmp eq i32 %inc7, 10 + br i1 %exitcond19, label %for.end8, label %for.body +; CHECK: for.inc6: +; CHECK: %inc7 = add nuw nsw i32 %i.018, 1 +; CHECK: %exitcond19 = icmp eq i32 %inc7, %flatten.tripcount +; CHECK: br i1 %exitcond19, label %for.end8, label %for.body + +for.end8: ; preds = %for.inc6 + ret i32 10 +} + + +declare i32 @func(i32) + diff --git a/llvm/test/Transforms/LoopFlatten/pr40581.ll b/llvm/test/Transforms/LoopFlatten/pr40581.ll new file mode 100644 index 0000000000000..0ee956a87e916 --- /dev/null +++ b/llvm/test/Transforms/LoopFlatten/pr40581.ll @@ -0,0 +1,108 @@ +; RUN: opt < %s -S -loop-flatten -verify-loop-info -verify-dom-info -verify-scev -verify | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; Test case and IR corresponding to this code: +; +; int k = 0; +; for(int i = 0; i < n; i++) +; for(int j = 0; j < n; j++) { +; A[k] = B[k]; +; k++; +; } +; +; TODO: this case doesn't trigger yet. +; +define dso_local void @v0(i32 %n, i32* nocapture %A, i32* nocapture readonly %B) local_unnamed_addr #0 { +; +; CHECK-LABEL: @v0 +; CHECK-NOT: %flatten.tripcount = mul i32 %n, %n +; +entry: + %cmp21 = icmp sgt i32 %n, 0 + br i1 %cmp21, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup + +for.cond1.preheader.us.preheader: + br label %for.cond1.preheader.us + +for.cond1.preheader.us: + %i.023.us = phi i32 [ %inc8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + %k.022.us = phi i32 [ %inc.us.lcssa, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + %0 = add i32 %n, %k.022.us + br label %for.body4.us + +for.body4.us: + %k.119.us = phi i32 [ %k.022.us, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] + %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %k.119.us + %1 = load i32, i32* %arrayidx.us, align 4 + %arrayidx5.us = getelementptr inbounds i32, i32* %A, i32 %k.119.us + store i32 %1, i32* %arrayidx5.us, align 4 + %inc.us = add i32 %k.119.us, 1 + %exitcond = icmp ne i32 %inc.us, %0 + br i1 %exitcond, label %for.body4.us, label %for.cond1.for.cond.cleanup3_crit_edge.us + +for.cond1.for.cond.cleanup3_crit_edge.us: + %inc.us.lcssa = phi i32 [ %inc.us, %for.body4.us ] + %inc8.us = add nuw nsw i32 %i.023.us, 1 + %cmp.us = icmp slt i32 %inc8.us, %n + br i1 %cmp.us, label %for.cond1.preheader.us, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +; Test case and IR corresponding to this code: +; +; for(int i = 0; i < n; i++) +; for(int j = 0; j < n; j++) { +; int k = i*n+j; +; A[k] = B[k]; +; k++; +; } +; +define dso_local void @v1(i32 %n, i32* nocapture %A, i32* nocapture readonly %B) local_unnamed_addr #0 { +; +; CHECK-LABEL: @v1 +; CHECK: for.cond1.preheader.us.preheader: +; CHECK: %flatten.tripcount = mul i32 %n, %n +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: +; CHECK: %inc8.us = add nuw nsw i32 %i.024.us, 1 +; CHECK: %cmp.us = icmp slt i32 %inc8.us, %flatten.tripcount +; +entry: + %cmp23 = icmp sgt i32 %n, 0 + br i1 %cmp23, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup + +for.cond1.preheader.us.preheader: + br label %for.cond1.preheader.us + +for.cond1.preheader.us: + %i.024.us = phi i32 [ %inc8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + %mul.us = mul nsw i32 %i.024.us, %n + br label %for.body4.us + +for.body4.us: + %j.022.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc6.us, %for.body4.us ] + %add.us = add nsw i32 %j.022.us, %mul.us + %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %add.us + %0 = load i32, i32* %arrayidx.us, align 4 + %arrayidx5.us = getelementptr inbounds i32, i32* %A, i32 %add.us + store i32 %0, i32* %arrayidx5.us, align 4 + %inc6.us = add nuw nsw i32 %j.022.us, 1 + %exitcond = icmp ne i32 %inc6.us, %n + br i1 %exitcond, label %for.body4.us, label %for.cond1.for.cond.cleanup3_crit_edge.us + +for.cond1.for.cond.cleanup3_crit_edge.us: + %inc8.us = add nuw nsw i32 %i.024.us, 1 + %cmp.us = icmp slt i32 %inc8.us, %n + br i1 %cmp.us, label %for.cond1.preheader.us, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} From 5101e7e8dd01719f9161e01e2f053c9797c247a8 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 1 Oct 2020 12:55:59 +0000 Subject: [PATCH 265/544] [gn build] Port d53b4bee0cc --- llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn index 9d4c7a06c9402..2fe47a4b73bee 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -38,6 +38,7 @@ static_library("Scalar") { "LoopDataPrefetch.cpp", "LoopDeletion.cpp", "LoopDistribute.cpp", + "LoopFlatten.cpp", "LoopFuse.cpp", "LoopIdiomRecognize.cpp", "LoopInstSimplify.cpp", From a20168d0307860047ad7c8a2074f98fc25b057c2 Mon Sep 17 00:00:00 2001 From: James Henderson Date: Fri, 25 Sep 2020 10:21:39 +0100 Subject: [PATCH 266/544] [Archive] Don't throw away errors for malformed archive members When adding an archive member with a problem, e.g. a new bitcode with an old archiver, containing an unsupported attribute, or an ELF file with a malformed symbol table, the archiver would throw away the error and simply add the member to the archive without any symbol entries. This meant that the resultant archive could be silently unusable when not using --whole-archive, and result in unexpected undefined symbols. This change fixes this issue by addressing two FIXMEs and only throwing away not-an-object errors. However, this meant that some LLD tests which didn't need symbol tables and were using invalid members deliberately to test the linker's malformed input handling no longer worked, so this patch also stops the archiver from looking for symbols in an object if it doesn't require a symbol table, and updates the tests accordingly. Differential Revision: https://reviews.llvm.org/D88288 Reviewed by: grimar, rupprecht, MaskRay --- lld/test/ELF/invalid/data-encoding.test | 2 +- lld/test/ELF/invalid/invalid-file-class.test | 2 +- llvm/include/llvm/Object/SymbolicFile.h | 2 + llvm/lib/Object/ArchiveWriter.cpp | 42 ++++++++------- llvm/lib/Object/SymbolicFile.cpp | 53 ++++++++++++++----- .../test/Object/archive-malformed-object.test | 38 +++++++++++++ .../test/Object/archive-unknown-filetype.test | 11 ++++ 7 files changed, 116 insertions(+), 34 deletions(-) create mode 100644 llvm/test/Object/archive-malformed-object.test create mode 100644 llvm/test/Object/archive-unknown-filetype.test diff --git a/lld/test/ELF/invalid/data-encoding.test b/lld/test/ELF/invalid/data-encoding.test index 5f0550f4dbb2f..94862af79c3cf 100644 --- a/lld/test/ELF/invalid/data-encoding.test +++ b/lld/test/ELF/invalid/data-encoding.test @@ -4,7 +4,7 @@ # Check we report this. # RUN: yaml2obj %s -o %t.o -# RUN: llvm-ar rcs %t.a %t.o +# RUN: llvm-ar rcS %t.a %t.o # RUN: not ld.lld --whole-archive %t.a -o /dev/null 2>&1 | FileCheck %s # CHECK: {{.*}}.a({{.*}}.o): corrupted ELF file: invalid data encoding diff --git a/lld/test/ELF/invalid/invalid-file-class.test b/lld/test/ELF/invalid/invalid-file-class.test index 1b40d69181b0d..3f547861b3793 100644 --- a/lld/test/ELF/invalid/invalid-file-class.test +++ b/lld/test/ELF/invalid/invalid-file-class.test @@ -11,7 +11,7 @@ ## EV_CURRENT(1), ELFOSABI_LINUX(3), , ET_REL(1), EM_NONE(0) # RUN: echo -e -n "\x7f\x45\x4c\x46\x00\x01\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00" > %t/invalid.o -# RUN: llvm-ar --format=gnu cr %t/invalid-class.a %t/invalid.o +# RUN: llvm-ar --format=gnu crS %t/invalid-class.a %t/invalid.o # RUN: not ld.lld -whole-archive %t/invalid-class.a -o /dev/null 2>&1 | FileCheck %s # CHECK: invalid-class.a(invalid.o): corrupted ELF file: invalid file class diff --git a/llvm/include/llvm/Object/SymbolicFile.h b/llvm/include/llvm/Object/SymbolicFile.h index a0d8b7225598b..5c964615e9d30 100644 --- a/llvm/include/llvm/Object/SymbolicFile.h +++ b/llvm/include/llvm/Object/SymbolicFile.h @@ -173,6 +173,8 @@ class SymbolicFile : public Binary { static bool classof(const Binary *v) { return v->isSymbolic(); } + + static bool isSymbolicFile(file_magic Type, const LLVMContext *Context); }; inline BasicSymbolRef::BasicSymbolRef(DataRefImpl SymbolP, diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index ca8ffa7706b0e..08f1b85f22597 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -359,22 +359,21 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { // reference to it, thus SymbolicFile should be destroyed first. LLVMContext Context; std::unique_ptr Obj; - if (identify_magic(Buf.getBuffer()) == file_magic::bitcode) { + + const file_magic Type = identify_magic(Buf.getBuffer()); + // Treat unsupported file types as having no symbols. + if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) + return Ret; + if (Type == file_magic::bitcode) { auto ObjOrErr = object::SymbolicFile::createSymbolicFile( Buf, file_magic::bitcode, &Context); - if (!ObjOrErr) { - // FIXME: check only for "not an object file" errors. - consumeError(ObjOrErr.takeError()); - return Ret; - } + if (!ObjOrErr) + return ObjOrErr.takeError(); Obj = std::move(*ObjOrErr); } else { auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); - if (!ObjOrErr) { - // FIXME: check only for "not an object file" errors. - consumeError(ObjOrErr.takeError()); - return Ret; - } + if (!ObjOrErr) + return ObjOrErr.takeError(); Obj = std::move(*ObjOrErr); } @@ -393,7 +392,7 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { static Expected> computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, - ArrayRef NewMembers) { + bool NeedSymbols, ArrayRef NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; // This ignores the symbol table, but we only need the value mod 8 and the @@ -494,13 +493,17 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, ModTime, Size); Out.flush(); - Expected> Symbols = - getSymbols(Buf, SymNames, HasObject); - if (auto E = Symbols.takeError()) - return std::move(E); + std::vector Symbols; + if (NeedSymbols) { + Expected> SymbolsOrErr = + getSymbols(Buf, SymNames, HasObject); + if (auto E = SymbolsOrErr.takeError()) + return std::move(E); + Symbols = std::move(*SymbolsOrErr); + } Pos += Header.size() + Data.size() + Padding.size(); - Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding}); + Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding}); } // If there are no symbols, emit an empty symbol table, to satisfy Solaris // tools, older versions of which expect a symbol table in a non-empty @@ -564,8 +567,9 @@ static Error writeArchiveToStream(raw_ostream &Out, SmallString<0> StringTableBuf; raw_svector_ostream StringTable(StringTableBuf); - Expected> DataOrErr = computeMemberData( - StringTable, SymNames, Kind, Thin, Deterministic, NewMembers); + Expected> DataOrErr = + computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic, + WriteSymtab, NewMembers); if (Error E = DataOrErr.takeError()) return E; std::vector &Data = *DataOrErr; diff --git a/llvm/lib/Object/SymbolicFile.cpp b/llvm/lib/Object/SymbolicFile.cpp index 3db4ad9ed14bd..72014fd34a4d3 100644 --- a/llvm/lib/Object/SymbolicFile.cpp +++ b/llvm/lib/Object/SymbolicFile.cpp @@ -41,20 +41,14 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, if (Type == file_magic::unknown) Type = identify_magic(Data); + if (!isSymbolicFile(Type, Context)) + return errorCodeToError(object_error::invalid_file_type); + switch (Type) { case file_magic::bitcode: - if (Context) - return IRObjectFile::create(Object, *Context); - LLVM_FALLTHROUGH; - case file_magic::unknown: - case file_magic::archive: - case file_magic::coff_cl_gl_object: - case file_magic::macho_universal_binary: - case file_magic::windows_resource: - case file_magic::pdb: - case file_magic::minidump: - case file_magic::tapi_file: - return errorCodeToError(object_error::invalid_file_type); + // Context is guaranteed to be non-null here, because bitcode magic only + // indicates a symbolic file when Context is non-null. + return IRObjectFile::create(Object, *Context); case file_magic::elf: case file_magic::elf_executable: case file_magic::elf_shared_object: @@ -95,6 +89,39 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), *Context); } + default: + llvm_unreachable("Unexpected Binary File Type"); + } +} + +bool SymbolicFile::isSymbolicFile(file_magic Type, const LLVMContext *Context) { + switch (Type) { + case file_magic::bitcode: + return Context != nullptr; + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::xcoff_object_32: + case file_magic::xcoff_object_64: + case file_magic::wasm_object: + case file_magic::coff_import_library: + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: + return true; + default: + return false; } - llvm_unreachable("Unexpected Binary File Type"); } diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test new file mode 100644 index 0000000000000..7d502c3f301d6 --- /dev/null +++ b/llvm/test/Object/archive-malformed-object.test @@ -0,0 +1,38 @@ +## Show that the archive library emits error messages when adding malformed +## objects. + +# RUN: rm -rf %t.dir +# RUN: split-file %s %t.dir +# RUN: cd %t.dir + +## Malformed bitcode object. +# RUN: llvm-as input.ll -o input.bc +# RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)" +# RUN: not llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1 + +# ERR1: error: bad.a: Invalid bitcode signature + +## Non-bitcode malformed file. +# RUN: yaml2obj input.yaml -o input.o +# RUN: not llvm-ar rc bad.a input.o 2>&1 | FileCheck %s --check-prefix=ERR2 + +# ERR2: error: bad.a: section header table goes past the end of the file: e_shoff = 0x9999 + +## Don't emit an error if the symbol table is not required. +# RUN: llvm-ar rcS good.a input.o input.bc +# RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS + +# CONTENTS: input.o +# CONTENTS-NEXT: input.bc + +#--- input.ll +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux" + +#--- input.yaml +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + EShOff: 0x9999 diff --git a/llvm/test/Object/archive-unknown-filetype.test b/llvm/test/Object/archive-unknown-filetype.test new file mode 100644 index 0000000000000..5647501c51fc1 --- /dev/null +++ b/llvm/test/Object/archive-unknown-filetype.test @@ -0,0 +1,11 @@ +## Show that the archive library does not emit an error or add any symbols to +## the archive symbol table, when it encounters an unknown file type, but still +## adds the file to the archive. + +# RUN: echo something > %t +# RUN: rm -f %t.a +# RUN: llvm-ar rc %t.a %t +# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=CONTENTS -DFILE=%basename_t +# RUN: llvm-nm --print-armap %t.a | FileCheck %s --allow-empty --implicit-check-not={{.}} + +# CONTENTS: [[FILE]] From 15474d769110139f9cc96d42434988d7aaa77634 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 30 Sep 2020 14:18:03 +0100 Subject: [PATCH 267/544] [SVE][CodeGen] Replace use of TypeSize operator< in GlobalMerge::doMerge We don't support global variables with scalable vector types so I've changed the code to compare the fixed sizes instead. Differential Revision: https://reviews.llvm.org/D88564 --- llvm/lib/CodeGen/GlobalMerge.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 1e20c02ba160e..6c1ce4c1efb04 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -223,8 +223,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, // FIXME: Find better heuristics llvm::stable_sort( Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { - return DL.getTypeAllocSize(GV1->getValueType()) < - DL.getTypeAllocSize(GV2->getValueType()); + // We don't support scalable global variables. + return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() < + DL.getTypeAllocSize(GV2->getValueType()).getFixedSize(); }); // If we want to just blindly group all globals together, do so. From 866d9b03f2902c177533d7ce148339d47bf092e1 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 1 Oct 2020 14:21:59 +0100 Subject: [PATCH 268/544] [AMDGPU] Tiny cleanup in isLegalFLATOffset. NFC. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 344d87f5443df..8f67c2a0bbdf8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6916,13 +6916,10 @@ bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) return false; - if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { - return (Signed && isInt<12>(Offset)) || - (!Signed && isUInt<11>(Offset)); - } + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) + return Signed ? isInt<12>(Offset) : isUInt<11>(Offset); - return (Signed && isInt<13>(Offset)) || - (!Signed && isUInt<12>(Offset)); + return Signed ? isInt<13>(Offset) :isUInt<12>(Offset); } From 95a440b936c26b97eb47d691df551d21ce8c13d3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Oct 2020 14:36:42 +0100 Subject: [PATCH 269/544] [IR] PatternMatch - add m_FShl/m_FShr funnel shift intrinsic matchers. NFCI. --- llvm/include/llvm/IR/PatternMatch.h | 12 ++++++++++++ llvm/lib/Analysis/InstructionSimplify.cpp | 15 +++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 78a311de06575..000a3af4cc2cf 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -2015,6 +2015,18 @@ inline typename m_Intrinsic_Ty::Ty m_FMax(const Opnd0 &Op0, return m_Intrinsic(Op0, Op1); } +template +inline typename m_Intrinsic_Ty::Ty +m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { + return m_Intrinsic(Op0, Op1, Op2); +} + +template +inline typename m_Intrinsic_Ty::Ty +m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { + return m_Intrinsic(Op0, Op1, Op2); +} + //===----------------------------------------------------------------------===// // Matchers for two-operands operators with the operators in either order // diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 7645cc93545cc..c13966169eeba 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3963,10 +3963,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, // Test for a bogus zero-shift-guard-op around funnel-shift or rotate. Value *ShAmt; - auto isFsh = m_CombineOr(m_Intrinsic(m_Value(X), m_Value(), - m_Value(ShAmt)), - m_Intrinsic(m_Value(), m_Value(X), - m_Value(ShAmt))); + auto isFsh = m_CombineOr(m_FShl(m_Value(X), m_Value(), m_Value(ShAmt)), + m_FShr(m_Value(), m_Value(X), m_Value(ShAmt))); // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt) @@ -3977,12 +3975,9 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, // intrinsics do not have that problem. // We do not allow this transform for the general funnel shift case because // that would not preserve the poison safety of the original code. - auto isRotate = m_CombineOr(m_Intrinsic(m_Value(X), - m_Deferred(X), - m_Value(ShAmt)), - m_Intrinsic(m_Value(X), - m_Deferred(X), - m_Value(ShAmt))); + auto isRotate = + m_CombineOr(m_FShl(m_Value(X), m_Deferred(X), m_Value(ShAmt)), + m_FShr(m_Value(X), m_Deferred(X), m_Value(ShAmt))); // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt) // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt) if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt && From 567049f89282d10ec2e82ea21e239fb0174a0ee1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Oct 2020 14:42:16 +0100 Subject: [PATCH 270/544] [InstCombine] Use m_FAbs matcher helper. NFCI. --- .../Transforms/InstCombine/InstCombineCompares.cpp | 2 +- .../Transforms/InstCombine/InstCombineMulDivRem.cpp | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index a0b4c2216cca9..7a7de4db80330 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6089,7 +6089,7 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, /// Optimize fabs(X) compared with zero. static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { Value *X; - if (!match(I.getOperand(0), m_Intrinsic(m_Value(X))) || + if (!match(I.getOperand(0), m_FAbs(m_Value(X))) || !match(I.getOperand(1), m_PosZeroFP())) return nullptr; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 245fd588a5231..2f94b46e01d5d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -432,13 +432,12 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { // fabs(X) * fabs(X) -> X * X // fabs(X) / fabs(X) -> X / X - if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X)))) + if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X)))) return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I); // fabs(X) * fabs(Y) --> fabs(X * Y) // fabs(X) / fabs(Y) --> fabs(X / Y) - if (match(Op0, m_Intrinsic(m_Value(X))) && - match(Op1, m_Intrinsic(m_Value(Y))) && + if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) && (Op0->hasOneUse() || Op1->hasOneUse())) { IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.setFastMathFlags(I.getFastMathFlags()); @@ -1393,10 +1392,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { // X / fabs(X) -> copysign(1.0, X) // fabs(X) / X -> copysign(1.0, X) if (I.hasNoNaNs() && I.hasNoInfs() && - (match(&I, - m_FDiv(m_Value(X), m_Intrinsic(m_Deferred(X)))) || - match(&I, m_FDiv(m_Intrinsic(m_Value(X)), - m_Deferred(X))))) { + (match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) || + match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) { Value *V = Builder.CreateBinaryIntrinsic( Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I); return replaceInstUsesWith(I, V); From 5665ec4e182dba9965847d3698ad64a950bb00a7 Mon Sep 17 00:00:00 2001 From: David Tenty Date: Wed, 30 Sep 2020 11:13:20 -0400 Subject: [PATCH 271/544] [compiler-rt][cmake][powerpc] Remove TEST_BIG_ENDIAN from base-config-ix.cmake It's actually not safe to call TEST_BIG_ENDIAN here, since we may be running from the builtins build (i.e builtins-config-ix) context where TEST_COMPILE_ONLY is set since without builtins already built we may fail to link, and TEST_BIG_ENDIAN internally performs tests which may fail to link without builtins. Fortunately powerpc is the only target that uses this information here and we actually already know the whether we are targeting the LE variant due to earlier macro checks, so we can simply this to remove our reliance on TEST_BIG_ENDIAN. Reviewed By: hubert.reinterpretcast, Whitney Differential Revision: https://reviews.llvm.org/D88608 --- compiler-rt/cmake/base-config-ix.cmake | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake index e7f951064bf9b..a4c29d2943cae 100644 --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -5,7 +5,6 @@ include(CheckIncludeFile) include(CheckCXXSourceCompiles) -include(TestBigEndian) check_include_file(unwind.h HAVE_UNWIND_H) @@ -188,22 +187,13 @@ macro(test_targets) test_target_arch(x86_64 "" "") endif() endif() + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le") + test_target_arch(powerpc64le "" "-m64") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc") - # Strip out -nodefaultlibs when calling TEST_BIG_ENDIAN. Configuration - # will fail with this option when building with a sanitizer. - cmake_push_check_state() - string(REPLACE "-nodefaultlibs" "" CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - TEST_BIG_ENDIAN(HOST_IS_BIG_ENDIAN) - cmake_pop_check_state() - - if(HOST_IS_BIG_ENDIAN) - if(CMAKE_SYSTEM_NAME MATCHES "AIX") - test_target_arch(powerpc "" "-m32") - endif() - test_target_arch(powerpc64 "" "-m64") - else() - test_target_arch(powerpc64le "" "-m64") + if(CMAKE_SYSTEM_NAME MATCHES "AIX") + test_target_arch(powerpc "" "-m32") endif() + test_target_arch(powerpc64 "" "-m64") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "s390x") test_target_arch(s390x "" "") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "sparc") From 79fbcbff41734e3d07e6200d33c3e40732dfae6a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 25 Sep 2020 22:25:03 +0200 Subject: [PATCH 272/544] [clangd] clangd --check: standalone diagnosis of common problems This is a tool to simply parse a file as clangd would, and run some common features (code actions, go-to-definition, hover) in an attempt to trigger or reproduce crashes, error diagnostics, etc. This is easier and more predictable than loading the file in clangd, because: - there's no editor/plugin variation to worry about - there's no accidental variation of user behavior or other extraneous requests - we trigger features at every token, rather than guessing - everything is synchronoous, logs are easier to reason about - it's easier to (get users to) capture logs when running on the command-line This is a fairly lightweight variant of this idea. We could do a lot more with it, and maybe we should. But I can't in the near future, and experience will tell us if we made the right tradeoffs and if it's worth investing further. Differential Revision: https://reviews.llvm.org/D88338 --- clang-tools-extra/clangd/test/check-fail.test | 13 + clang-tools-extra/clangd/test/check.test | 13 + clang-tools-extra/clangd/tool/CMakeLists.txt | 1 + clang-tools-extra/clangd/tool/Check.cpp | 258 ++++++++++++++++++ clang-tools-extra/clangd/tool/ClangdMain.cpp | 33 ++- 5 files changed, 315 insertions(+), 3 deletions(-) create mode 100644 clang-tools-extra/clangd/test/check-fail.test create mode 100644 clang-tools-extra/clangd/test/check.test create mode 100644 clang-tools-extra/clangd/tool/Check.cpp diff --git a/clang-tools-extra/clangd/test/check-fail.test b/clang-tools-extra/clangd/test/check-fail.test new file mode 100644 index 0000000000000..7462ce5ecf5f3 --- /dev/null +++ b/clang-tools-extra/clangd/test/check-fail.test @@ -0,0 +1,13 @@ +// RUN: not clangd -check=%s 2>&1 | FileCheck -strict-whitespace %s + +// CHECK: Testing on source file {{.*}}check-fail.test +// CHECK: internal (cc1) args are: -cc1 +// CHECK: Building preamble... +// CHECK: [pp_file_not_found] Line {{.*}}: 'missing.h' file not found +// CHECK: Building AST... +// CHECK: Testing features at each token +// CHECK: tweak: ExpandAutoType ==> FAIL +// CHECK: All checks completed, 2 errors + +#include "missing.h" +auto x = []{}; diff --git a/clang-tools-extra/clangd/test/check.test b/clang-tools-extra/clangd/test/check.test new file mode 100644 index 0000000000000..832629ce29ef8 --- /dev/null +++ b/clang-tools-extra/clangd/test/check.test @@ -0,0 +1,13 @@ +# RUN: clangd -log=verbose -check 2>&1 | FileCheck -strict-whitespace %s + +CHECK: Testing on source file {{.*}}test.cc +CHECK: internal (cc1) args are: -cc1 +CHECK: Building preamble... +CHECK: Built preamble +CHECK: Building AST... +CHECK: Testing features at each token +CHECK-DAG: hover: false +CHECK-DAG: hover: true +CHECK-DAG: tweak: AddUsing +CHECK: All checks completed, 0 errors + diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index 670e5a17013ab..65e0aa35f2654 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -3,6 +3,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) add_clang_tool(clangd ClangdMain.cpp + Check.cpp $ ) diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp new file mode 100644 index 0000000000000..14ee0fdec9c91 --- /dev/null +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -0,0 +1,258 @@ +//===--- Check.cpp - clangd self-diagnostics ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Many basic problems can occur processing a file in clangd, e.g.: +// - system includes are not found +// - crash when indexing its AST +// clangd --check provides a simplified, isolated way to reproduce these, +// with no editor, LSP, threads, background indexing etc to contend with. +// +// One important use case is gathering information for bug reports. +// Another is reproducing crashes, and checking which setting prevent them. +// +// It simulates opening a file (determining compile command, parsing, indexing) +// and then running features at many locations. +// +// Currently it adds some basic logging of progress and results. +// We should consider extending it to also recognize common symptoms and +// recommend solutions (e.g. standard library installation issues). +// +//===----------------------------------------------------------------------===// + +#include "ClangdLSPServer.h" +#include "CodeComplete.h" +#include "GlobalCompilationDatabase.h" +#include "Hover.h" +#include "ParsedAST.h" +#include "Preamble.h" +#include "SourceCode.h" +#include "XRefs.h" +#include "index/CanonicalIncludes.h" +#include "index/FileIndex.h" +#include "refactor/Tweak.h" +#include "support/ThreadsafeFS.h" +#include "clang/AST/ASTContext.h" +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Format/Format.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Path.h" + +namespace clang { +namespace clangd { +namespace { + +// Print (and count) the error-level diagnostics (warnings are ignored). +unsigned showErrors(llvm::ArrayRef Diags) { + unsigned ErrCount = 0; + for (const auto &D : Diags) { + if (D.Severity >= DiagnosticsEngine::Error) { + elog("[{0}] Line {1}: {2}", D.Name, D.Range.start.line + 1, D.Message); + ++ErrCount; + } + } + return ErrCount; +} + +// This class is just a linear pipeline whose functions get called in sequence. +// Each exercises part of clangd's logic on our test file and logs results. +// Later steps depend on state built in earlier ones (such as the AST). +// Many steps can fatally fail (return false), then subsequent ones cannot run. +// Nonfatal failures are logged and tracked in ErrCount. +class Checker { + // from constructor + std::string File; + ClangdLSPServer::Options Opts; + // from buildCommand + tooling::CompileCommand Cmd; + // from buildInvocation + ParseInputs Inputs; + std::unique_ptr Invocation; + format::FormatStyle Style; + // from buildAST + std::shared_ptr Preamble; + llvm::Optional AST; + FileIndex Index; + +public: + // Number of non-fatal errors seen. + unsigned ErrCount = 0; + + Checker(llvm::StringRef File, const ClangdLSPServer::Options &Opts) + : File(File), Opts(Opts) {} + + // Read compilation database and choose a compile command for the file. + bool buildCommand() { + log("Loading compilation database..."); + std::unique_ptr BaseCDB = + std::make_unique( + Opts.CompileCommandsDir); + BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), + std::move(BaseCDB)); + auto Mangler = CommandMangler::detect(); + if (Opts.ResourceDir) + Mangler.ResourceDir = *Opts.ResourceDir; + auto CDB = std::make_unique( + BaseCDB.get(), std::vector{}, + tooling::ArgumentsAdjuster(std::move(Mangler))); + + if (auto TrueCmd = CDB->getCompileCommand(File)) { + Cmd = std::move(*TrueCmd); + log("Compile command from CDB is: {0}", llvm::join(Cmd.CommandLine, " ")); + } else { + Cmd = CDB->getFallbackCommand(File); + log("Generic fallback command is: {0}", llvm::join(Cmd.CommandLine, " ")); + } + + return true; + } + + // Prepare inputs and build CompilerInvocation (parsed compile command). + bool buildInvocation(const ThreadsafeFS &TFS, + llvm::Optional Contents) { + StoreDiags CaptureInvocationDiags; + std::vector CC1Args; + Inputs.CompileCommand = Cmd; + Inputs.TFS = &TFS; + if (Contents.hasValue()) { + Inputs.Contents = *Contents; + log("Imaginary source file contents:\n{0}", Inputs.Contents); + } else { + if (auto Contents = TFS.view(llvm::None)->getBufferForFile(File)) { + Inputs.Contents = Contents->get()->getBuffer().str(); + } else { + elog("Couldn't read {0}: {1}", File, Contents.getError().message()); + return false; + } + } + Inputs.Opts.ClangTidyOpts = + Opts.GetClangTidyOptions(*TFS.view(llvm::None), File); + log("Parsing command..."); + Invocation = + buildCompilerInvocation(Inputs, CaptureInvocationDiags, &CC1Args); + auto InvocationDiags = CaptureInvocationDiags.take(); + ErrCount += showErrors(InvocationDiags); + log("internal (cc1) args are: {0}", llvm::join(CC1Args, " ")); + if (!Invocation) { + elog("Failed to parse command line"); + return false; + } + + // FIXME: Check that resource-dir/built-in-headers exist? + + Style = getFormatStyleForFile(File, Inputs.Contents, TFS); + + return true; + } + + // Build preamble and AST, and index them. + bool buildAST() { + log("Building preamble..."); + Preamble = + buildPreamble(File, *Invocation, Inputs, /*StoreInMemory=*/true, + [&](ASTContext &Ctx, std::shared_ptr PP, + const CanonicalIncludes &Includes) { + if (!Opts.BuildDynamicSymbolIndex) + return; + log("Indexing headers..."); + Index.updatePreamble(File, /*Version=*/"null", Ctx, + std::move(PP), Includes); + }); + if (!Preamble) { + elog("Failed to build preamble"); + return false; + } + ErrCount += showErrors(Preamble->Diags); + + log("Building AST..."); + AST = ParsedAST::build(File, Inputs, std::move(Invocation), + /*InvocationDiags=*/std::vector{}, Preamble); + if (!AST) { + elog("Failed to build AST"); + return false; + } + ErrCount += showErrors(llvm::makeArrayRef(AST->getDiagnostics()) + .drop_front(Preamble->Diags.size())); + + if (Opts.BuildDynamicSymbolIndex) { + log("Indexing AST..."); + Index.updateMain(File, *AST); + } + return true; + } + + // Run AST-based features at each token in the file. + void testLocationFeatures() { + log("Testing features at each token (may be slow in large files)"); + auto SpelledTokens = + AST->getTokens().spelledTokens(AST->getSourceManager().getMainFileID()); + for (const auto &Tok : SpelledTokens) { + unsigned Start = AST->getSourceManager().getFileOffset(Tok.location()); + unsigned End = Start + Tok.length(); + Position Pos = offsetToPosition(Inputs.Contents, Start); + // FIXME: dumping the tokens may leak sensitive code into bug reports. + // Add an option to turn this off, once we decide how options work. + vlog(" {0} {1}", Pos, Tok.text(AST->getSourceManager())); + auto Tree = SelectionTree::createRight(AST->getASTContext(), + AST->getTokens(), Start, End); + Tweak::Selection Selection(&Index, *AST, Start, End, std::move(Tree)); + for (const auto &T : prepareTweaks(Selection, Opts.TweakFilter)) { + auto Result = T->apply(Selection); + if (!Result) { + elog(" tweak: {0} ==> FAIL: {1}", T->id(), Result.takeError()); + ++ErrCount; + } else { + vlog(" tweak: {0}", T->id()); + } + } + unsigned Definitions = locateSymbolAt(*AST, Pos, &Index).size(); + vlog(" definition: {0}", Definitions); + + auto Hover = getHover(*AST, Pos, Style, &Index); + vlog(" hover: {0}", Hover.hasValue()); + + // FIXME: it'd be nice to include code completion, but it's too slow. + // Maybe in combination with a line restriction? + } + } +}; + +} // namespace + +bool check(llvm::StringRef File, const ThreadsafeFS &TFS, + const ClangdLSPServer::Options &Opts) { + llvm::SmallString<0> FakeFile; + llvm::Optional Contents; + if (File.empty()) { + llvm::sys::path::system_temp_directory(false, FakeFile); + llvm::sys::path::append(FakeFile, "test.cc"); + File = FakeFile; + Contents = R"cpp( + #include + #include + + size_t N = 50; + auto xxx = std::string(N, 'x'); + )cpp"; + } + log("Testing on source file {0}", File); + + Checker C(File, Opts); + if (!C.buildCommand() || !C.buildInvocation(TFS, Contents) || !C.buildAST()) + return false; + C.testLocationFeatures(); + + log("All checks completed, {0} errors", C.ErrCount); + return C.ErrCount == 0; +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index a897a9a3531d2..98daaf9573597 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -47,6 +47,11 @@ namespace clang { namespace clangd { + +// Implemented in Check.cpp. +bool check(const llvm::StringRef File, const ThreadsafeFS &TFS, + const ClangdLSPServer::Options &Opts); + namespace { using llvm::cl::cat; @@ -57,6 +62,7 @@ using llvm::cl::init; using llvm::cl::list; using llvm::cl::opt; using llvm::cl::OptionCategory; +using llvm::cl::ValueOptional; using llvm::cl::values; // All flags must be placed in a category, or they will be shown neither in @@ -354,6 +360,16 @@ opt Test{ Hidden, }; +opt CheckFile{ + "check", + cat(Misc), + desc("Parse one file in isolation instead of acting as a language server. " + "Useful to investigate/reproduce crashes or configuration problems. " + "With --check=, attempts to parse a particular file."), + init(""), + ValueOptional, +}; + enum PCHStorageFlag { Disk, Memory }; opt PCHStorage{ "pch-storage", @@ -541,7 +557,8 @@ const char TestScheme::TestDir[] = "/clangd-test"; enum class ErrorResultCode : int { NoShutdownRequest = 1, - CantRunAsXPCService = 2 + CantRunAsXPCService = 2, + CheckFailed = 3 }; int main(int argc, char *argv[]) { @@ -646,7 +663,8 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // If a user ran `clangd` in a terminal without redirecting anything, // it's somewhat likely they're confused about how to use clangd. // Show them the help overview, which explains. - if (llvm::outs().is_displayed() && llvm::errs().is_displayed()) + if (llvm::outs().is_displayed() && llvm::errs().is_displayed() && + !CheckFile.getNumOccurrences()) llvm::errs() << Overview << "\n"; // Use buffered stream to stderr (we still flush each log message). Unbuffered // stream can cause significant (non-deterministic) latency for the logger. @@ -825,6 +843,15 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // Shall we allow to customize the file limit? Opts.Rename.AllowCrossFile = CrossFileRename; + if (CheckFile.getNumOccurrences()) { + llvm::SmallString<256> Path; + llvm::sys::fs::real_path(CheckFile, Path, /*expand_tilde=*/true); + log("Entering check mode (no LSP server)"); + return check(Path, TFS, Opts) + ? 0 + : static_cast(ErrorResultCode::CheckFailed); + } + // Initialize and run ClangdLSPServer. // Change stdin to binary to not lose \r\n on windows. llvm::sys::ChangeStdinToBinary(); @@ -835,7 +862,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var TransportLayer = newXPCTransport(); #else llvm::errs() << "This clangd binary wasn't built with XPC support.\n"; - return (int)ErrorResultCode::CantRunAsXPCService; + return static_cast(ErrorResultCode::CantRunAsXPCService); #endif } else { log("Starting LSP over stdin/stdout"); From 5011d43108d1de30a056d66e73fa19062e0e84b7 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 28 Sep 2020 09:33:11 +0000 Subject: [PATCH 273/544] Migrate Declarators to use the List API After this change all nodes that have a delimited-list are using the `List` API. Implementation details: Let's look at a declaration with multiple declarators: `int a, b;` To generate a declarator list node we need to have the range of declarators: `a, b`: However, the `ClangAST` actually stores them as separate declarations: `int a ;` `int b;` We solve that by appropriately marking the declarators on each separate declaration in the `ClangAST` and then for the final declarator `int b`, shrinking its range to fit to the already marked declarators. Differential Revision: https://reviews.llvm.org/D88403 --- clang/include/clang/Tooling/Syntax/Nodes.h | 18 +- clang/lib/Tooling/Syntax/BuildTree.cpp | 69 +- clang/lib/Tooling/Syntax/Nodes.cpp | 27 + clang/lib/Tooling/Syntax/Synthesis.cpp | 2 + .../Tooling/Syntax/BuildTreeTest.cpp | 1844 +++++++++-------- .../Tooling/Syntax/SynthesisTest.cpp | 21 +- 6 files changed, 1108 insertions(+), 873 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index 8b393c5423b4d..ed4449adb0f06 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -99,10 +99,14 @@ enum class NodeKind : uint16_t { ParametersAndQualifiers, MemberPointer, UnqualifiedId, + + // Lists + DeclaratorList, ParameterDeclarationList, CallArguments, - // Nested Name Specifiers. NestedNameSpecifier, + + // Name Specifiers. GlobalNameSpecifier, DecltypeNameSpecifier, IdentifierNameSpecifier, @@ -179,6 +183,7 @@ enum class NodeRole : uint8_t { Member, Callee, Arguments, + Declarators }; /// For debugging purposes. raw_ostream &operator<<(raw_ostream &OS, NodeRole R); @@ -823,6 +828,17 @@ class LinkageSpecificationDeclaration final : public Declaration { } }; +class DeclaratorList final : public List { +public: + DeclaratorList() : List(NodeKind::DeclaratorList) {} + static bool classof(const Node *N) { + return N->getKind() == NodeKind::DeclaratorList; + } + std::vector getDeclarators(); + std::vector> + getDeclaratorsAndCommas(); +}; + /// Groups multiple declarators (e.g. variables, typedefs, etc.) together. All /// grouped declarators share the same declaration specifiers (e.g. 'int' or /// 'typedef'). diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 4d365090abf1e..e1ed55f2e4eb6 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -397,6 +397,17 @@ class syntax::TreeBuilder { Mapping.add(From, New); } + /// Populate children for \p New list, assuming it covers tokens from a + /// subrange of \p SuperRange. + void foldList(ArrayRef SuperRange, syntax::List *New, + ASTPtr From) { + assert(New); + auto ListRange = Pending.shrinkToFitList(SuperRange); + Pending.foldChildren(Arena, ListRange, New); + if (From) + Mapping.add(From, New); + } + /// Notifies that we should not consume trailing semicolon when computing /// token range of \p D. void noticeDeclWithoutSemicolon(Decl *D); @@ -579,6 +590,35 @@ class syntax::TreeBuilder { It->second->setRole(Role); } + /// Shrink \p Range to a subrange that only contains tokens of a list. + /// List elements and delimiters should already have correct roles. + ArrayRef shrinkToFitList(ArrayRef Range) { + auto BeginChildren = Trees.lower_bound(Range.begin()); + assert((BeginChildren == Trees.end() || + BeginChildren->first == Range.begin()) && + "Range crosses boundaries of existing subtrees"); + + auto EndChildren = Trees.lower_bound(Range.end()); + assert( + (EndChildren == Trees.end() || EndChildren->first == Range.end()) && + "Range crosses boundaries of existing subtrees"); + + auto BelongsToList = [](decltype(Trees)::value_type KV) { + auto Role = KV.second->getRole(); + return Role == syntax::NodeRole::ListElement || + Role == syntax::NodeRole::ListDelimiter; + }; + + auto BeginListChildren = + std::find_if(BeginChildren, EndChildren, BelongsToList); + + auto EndListChildren = + std::find_if_not(BeginListChildren, EndChildren, BelongsToList); + + return ArrayRef(BeginListChildren->first, + EndListChildren->first); + } + /// Add \p Node to the forest and attach child nodes based on \p Tokens. void foldChildren(const syntax::Arena &A, ArrayRef Tokens, syntax::Tree *Node) { @@ -1513,14 +1553,31 @@ class BuildTreeVisitor : public RecursiveASTVisitor { // There doesn't have to be a declarator (e.g. `void foo(int)` only has // declaration, but no declarator). - if (Range.getBegin().isValid()) { - auto *N = new (allocator()) syntax::SimpleDeclarator; - Builder.foldNode(Builder.getRange(Range), N, nullptr); - Builder.markChild(N, syntax::NodeRole::Declarator); + if (!Range.getBegin().isValid()) { + Builder.markChild(new (allocator()) syntax::DeclaratorList, + syntax::NodeRole::Declarators); + Builder.foldNode(Builder.getDeclarationRange(D), + new (allocator()) syntax::SimpleDeclaration, D); + return true; } - if (Builder.isResponsibleForCreatingDeclaration(D)) { - Builder.foldNode(Builder.getDeclarationRange(D), + auto *N = new (allocator()) syntax::SimpleDeclarator; + Builder.foldNode(Builder.getRange(Range), N, nullptr); + Builder.markChild(N, syntax::NodeRole::ListElement); + + if (!Builder.isResponsibleForCreatingDeclaration(D)) { + // If this is not the last declarator in the declaration we expect a + // delimiter after it. + const auto *DelimiterToken = std::next(Builder.findToken(Range.getEnd())); + if (DelimiterToken->kind() == clang::tok::TokenKind::comma) + Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); + } else { + auto *DL = new (allocator()) syntax::DeclaratorList; + auto DeclarationRange = Builder.getDeclarationRange(D); + Builder.foldList(DeclarationRange, DL, nullptr); + + Builder.markChild(DL, syntax::NodeRole::Declarators); + Builder.foldNode(DeclarationRange, new (allocator()) syntax::SimpleDeclaration, D); } return true; diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 24b7a85963829..b4d1cfd75ac56 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -136,6 +136,8 @@ raw_ostream &syntax::operator<<(raw_ostream &OS, NodeKind K) { return OS << "CallArguments"; case NodeKind::ParameterDeclarationList: return OS << "ParameterDeclarationList"; + case NodeKind::DeclaratorList: + return OS << "DeclaratorList"; } llvm_unreachable("unknown node kind"); } @@ -218,6 +220,8 @@ raw_ostream &syntax::operator<<(raw_ostream &OS, NodeRole R) { return OS << "Callee"; case syntax::NodeRole::Arguments: return OS << "Arguments"; + case syntax::NodeRole::Declarators: + return OS << "Declarators"; } llvm_unreachable("invalid role"); } @@ -291,6 +295,29 @@ syntax::ParameterDeclarationList::getParametersAndCommas() { return Children; } +std::vector +syntax::DeclaratorList::getDeclarators() { + auto DeclaratorsAsNodes = getElementsAsNodes(); + std::vector Children; + for (const auto &DeclaratorAsNode : DeclaratorsAsNodes) { + Children.push_back(llvm::cast(DeclaratorAsNode)); + } + return Children; +} + +std::vector> +syntax::DeclaratorList::getDeclaratorsAndCommas() { + auto DeclaratorsAsNodesAndCommas = getElementsAsNodesAndDelimiters(); + std::vector> + Children; + for (const auto &DeclaratorAsNodeAndComma : DeclaratorsAsNodesAndCommas) { + Children.push_back( + {llvm::cast(DeclaratorAsNodeAndComma.element), + DeclaratorAsNodeAndComma.delimiter}); + } + return Children; +} + syntax::Expression *syntax::MemberExpression::getObject() { return cast_or_null(findChild(syntax::NodeRole::Object)); } diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp index e197c8d35bde4..73452b709de9f 100644 --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -183,6 +183,8 @@ syntax::Tree *allocateTree(syntax::Arena &A, syntax::NodeKind Kind) { return new (A.getAllocator()) syntax::CallArguments; case syntax::NodeKind::ParameterDeclarationList: return new (A.getAllocator()) syntax::ParameterDeclarationList; + case syntax::NodeKind::DeclaratorList: + return new (A.getAllocator()) syntax::DeclaratorList; } llvm_unreachable("unknown node kind"); } diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index ecb4a7ce73a50..6066aba4f716c 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -92,21 +92,23 @@ void foo() {} TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'main' -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'main' +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | `-')' CloseParen | `-CompoundStatement | |-'{' OpenParen | `-'}' CloseParen `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'foo' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'foo' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen `-'}' CloseParen @@ -123,16 +125,18 @@ int b = 42; TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | `-'a' +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-'a' | `-';' `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'b' - | |-'=' - | `-IntegerLiteralExpression - | `-'42' LiteralToken + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'b' + | |-'=' + | `-IntegerLiteralExpression + | `-'42' LiteralToken `-';' )txt")); } @@ -146,21 +150,24 @@ void foo(int a, int b) {} TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'foo' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | |-'int' - | | | `-SimpleDeclarator Declarator - | | | `-'a' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | |-'int' - | | `-SimpleDeclarator Declarator - | | `-'b' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'foo' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | |-'int' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | `-'a' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | |-'int' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | `-'b' + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen `-'}' CloseParen @@ -178,8 +185,9 @@ TranslationUnit Detached `-SimpleDeclaration |-'in\ t' - |-SimpleDeclarator Declarator - | `-'a' + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | `-'a' `-';' )txt")); } @@ -264,8 +272,9 @@ RangeBasedForStatement Statement |-'(' |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | `-'x' +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-'x' | `-':' |-IdExpression | `-UnqualifiedId UnqualifiedId @@ -287,11 +296,12 @@ void test() { DeclarationStatement Statement |-SimpleDeclaration | |-'int' -| `-SimpleDeclarator Declarator -| |-'a' -| |-'=' -| `-IntegerLiteralExpression -| `-'10' LiteralToken +| `-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'a' +| |-'=' +| `-IntegerLiteralExpression +| `-'10' LiteralToken `-';' )txt"})); } @@ -391,11 +401,12 @@ void test() { TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen |-ExpressionStatement Statement @@ -642,8 +653,9 @@ SimpleDeclaration | | `-'n' | `-'::' ListDelimiter |-'S' -`-SimpleDeclarator Declarator - `-'s1' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-'s1' )txt", R"txt( SimpleDeclaration @@ -652,8 +664,9 @@ SimpleDeclaration | | `-'n' | `-'::' ListDelimiter |-'S' -`-SimpleDeclarator Declarator - `-'s2' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-'s2' )txt"})); } @@ -684,8 +697,9 @@ SimpleDeclaration | | `-'>' | `-'::' ListDelimiter |-'S' -`-SimpleDeclarator Declarator - `-'s1' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-'s1' )txt", R"txt( SimpleDeclaration @@ -698,8 +712,9 @@ SimpleDeclaration | | `-'>' | `-'::' ListDelimiter |-'S' -`-SimpleDeclarator Declarator - `-'s2' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-'s2' )txt"})); } @@ -1363,11 +1378,12 @@ TEST_P(BuildSyntaxTreeTest, StringLiteral_Raw) { "TranslationUnit Detached\n" "`-SimpleDeclaration\n" " |-'void'\n" - " |-SimpleDeclarator Declarator\n" - " | |-'test'\n" - " | `-ParametersAndQualifiers\n" - " | |-'(' OpenParen\n" - " | `-')' CloseParen\n" + " |-DeclaratorList Declarators\n" + " | `-SimpleDeclarator ListElement\n" + " | |-'test'\n" + " | `-ParametersAndQualifiers\n" + " | |-'(' OpenParen\n" + " | `-')' CloseParen\n" " `-CompoundStatement\n" " |-'{' OpenParen\n" " |-ExpressionStatement Statement\n" @@ -2875,21 +2891,23 @@ int *c, d; TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'*' -| | `-'a' -| |-',' -| |-SimpleDeclarator Declarator -| | `-'b' +| |-DeclaratorList Declarators +| | |-SimpleDeclarator ListElement +| | | |-'*' +| | | `-'a' +| | |-',' ListDelimiter +| | `-SimpleDeclarator ListElement +| | `-'b' | `-';' `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'*' - | `-'c' - |-',' - |-SimpleDeclarator Declarator - | `-'d' + |-DeclaratorList Declarators + | |-SimpleDeclarator ListElement + | | |-'*' + | | `-'c' + | |-',' ListDelimiter + | `-SimpleDeclarator ListElement + | `-'d' `-';' )txt")); } @@ -2904,12 +2922,13 @@ TranslationUnit Detached `-SimpleDeclaration |-'typedef' |-'int' - |-SimpleDeclarator Declarator - | |-'*' - | `-'a' - |-',' - |-SimpleDeclarator Declarator - | `-'b' + |-DeclaratorList Declarators + | |-SimpleDeclarator ListElement + | | |-'*' + | | `-'a' + | |-',' ListDelimiter + | `-SimpleDeclarator ListElement + | `-'b' `-';' )txt")); } @@ -2926,33 +2945,36 @@ void foo() { TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'foo' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'foo' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen |-DeclarationStatement Statement | |-SimpleDeclaration | | |-'int' - | | |-SimpleDeclarator Declarator - | | | |-'*' - | | | `-'a' - | | |-',' - | | `-SimpleDeclarator Declarator - | | `-'b' + | | `-DeclaratorList Declarators + | | |-SimpleDeclarator ListElement + | | | |-'*' + | | | `-'a' + | | |-',' ListDelimiter + | | `-SimpleDeclarator ListElement + | | `-'b' | `-';' |-DeclarationStatement Statement | |-SimpleDeclaration | | |-'typedef' | | |-'int' - | | |-SimpleDeclarator Declarator - | | | |-'*' - | | | `-'ta' - | | |-',' - | | `-SimpleDeclarator Declarator - | | `-'tb' + | | `-DeclaratorList Declarators + | | |-SimpleDeclarator ListElement + | | | |-'*' + | | | `-'ta' + | | |-',' ListDelimiter + | | `-SimpleDeclarator ListElement + | | `-'tb' | `-';' `-'}' CloseParen )txt")); @@ -2979,8 +3001,9 @@ TranslationUnit Detached | |-'*' | `-')' |-')' - |-SimpleDeclarator Declarator - | `-'size_t' + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | `-'size_t' `-';' )txt")); } @@ -3174,9 +3197,10 @@ SimpleDeclaration SimpleDeclaration |-'struct' |-'Y' -|-SimpleDeclarator Declarator -| |-'*' -| `-'y1' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'*' +| `-'y1' `-';' )txt"})); } @@ -3202,9 +3226,10 @@ SimpleDeclaration |-'Y' |-'{' |-'}' -|-SimpleDeclarator Declarator -| |-'*' -| `-'y2' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'*' +| `-'y2' `-';' )txt", R"txt( @@ -3212,9 +3237,10 @@ SimpleDeclaration |-'struct' |-'{' |-'}' -|-SimpleDeclarator Declarator -| |-'*' -| `-'a1' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'*' +| `-'a1' `-';' )txt"})); } @@ -3233,11 +3259,12 @@ struct S { SimpleDeclaration |-'static' |-'void' -|-SimpleDeclarator Declarator -| |-'f' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'f' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| `-')' CloseParen `-CompoundStatement |-'{' OpenParen `-'}' CloseParen @@ -3258,15 +3285,16 @@ struct S { {R"txt( SimpleDeclaration |-'void' -|-SimpleDeclarator Declarator -| |-NestedNameSpecifier -| | |-IdentifierNameSpecifier ListElement -| | | `-'S' -| | `-'::' ListDelimiter -| |-'f' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-NestedNameSpecifier +| | |-IdentifierNameSpecifier ListElement +| | | `-'S' +| | `-'::' ListDelimiter +| |-'f' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| `-')' CloseParen `-CompoundStatement |-'{' OpenParen `-'}' CloseParen @@ -3285,12 +3313,13 @@ struct X { )cpp", {R"txt( SimpleDeclaration -|-SimpleDeclarator Declarator -| |-'operator' -| |-'int' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'operator' +| |-'int' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| `-')' CloseParen `-';' )txt"})); } @@ -3307,16 +3336,17 @@ unsigned operator "" _c(char); TranslationUnit Detached `-SimpleDeclaration |-'unsigned' - |-SimpleDeclarator Declarator - | |-'operator' - | |-'""' - | |-'_c' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | `-SimpleDeclaration ListElement - | | `-'char' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'operator' + | |-'""' + | |-'_c' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | `-SimpleDeclaration ListElement + | | `-'char' + | `-')' CloseParen `-';' )txt")); } @@ -3341,13 +3371,14 @@ TranslationUnit Detached |-'>' `-SimpleDeclaration |-'unsigned' - |-SimpleDeclarator Declarator - | |-'operator' - | |-'""' - | |-'_t' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'operator' + | |-'""' + | |-'_t' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-';' )txt")); } @@ -3365,19 +3396,21 @@ struct X { {R"txt( SimpleDeclaration |-'X' -|-SimpleDeclarator Declarator -| |-'&' -| |-'operator' -| |-'=' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-ParameterDeclarationList Parameters -| | `-SimpleDeclaration ListElement -| | |-'const' -| | |-'X' -| | `-SimpleDeclarator Declarator -| | `-'&' -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'&' +| |-'operator' +| |-'=' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-ParameterDeclarationList Parameters +| | `-SimpleDeclaration ListElement +| | |-'const' +| | |-'X' +| | `-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-'&' +| `-')' CloseParen `-';' )txt"})); } @@ -3397,21 +3430,23 @@ UnknownDeclaration `-SimpleDeclaration |-'friend' |-'X' - |-SimpleDeclarator Declarator - | |-'operator' - | |-'+' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | `-'X' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | |-'const' - | | |-'X' - | | `-SimpleDeclarator Declarator - | | `-'&' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'operator' + | |-'+' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | `-'X' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | |-'const' + | | |-'X' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | `-'&' + | `-')' CloseParen `-';' )txt"})); } @@ -3463,11 +3498,12 @@ TranslationUnit Detached |-'>' `-SimpleDeclaration |-'T' - |-SimpleDeclarator Declarator - | |-'f' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'f' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-';' )txt")); } @@ -3491,11 +3527,12 @@ TranslationUnit Detached |-'>' `-SimpleDeclaration |-'T' - |-SimpleDeclarator Declarator - | |-'var' - | |-'=' - | `-IntegerLiteralExpression - | `-'10' LiteralToken + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'var' + | |-'=' + | `-IntegerLiteralExpression + | `-'10' LiteralToken `-';' )txt")); } @@ -3522,11 +3559,12 @@ TemplateDeclaration Declaration `-SimpleDeclaration |-'static' |-'U' - |-SimpleDeclarator Declarator - | |-'f' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'f' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-';' )txt"})); } @@ -3565,11 +3603,12 @@ TranslationUnit Detached | |-'>' | `-SimpleDeclaration | |-'U' - | |-SimpleDeclarator Declarator - | | |-'foo' - | | `-ParametersAndQualifiers - | | |-'(' OpenParen - | | `-')' CloseParen + | |-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-'foo' + | | `-ParametersAndQualifiers + | | |-'(' OpenParen + | | `-')' CloseParen | `-';' |-'}' `-';' @@ -3617,11 +3656,12 @@ TranslationUnit Detached | | `-SimpleDeclaration | | |-'static' | | |-'U' - | | |-SimpleDeclarator Declarator - | | | |-'f' - | | | `-ParametersAndQualifiers - | | | |-'(' OpenParen - | | | `-')' CloseParen + | | |-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | |-'f' + | | | `-ParametersAndQualifiers + | | | |-'(' OpenParen + | | | `-')' CloseParen | | `-';' | |-'}' | `-';' @@ -3834,8 +3874,9 @@ TranslationUnit Detached | |-'"C"' | `-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | `-'a' +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-'a' | `-';' `-LinkageSpecificationDeclaration |-'extern' @@ -3843,13 +3884,15 @@ TranslationUnit Detached |-'{' |-SimpleDeclaration | |-'int' - | |-SimpleDeclarator Declarator - | | `-'b' + | |-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | `-'b' | `-';' |-SimpleDeclaration | |-'int' - | |-SimpleDeclarator Declarator - | | `-'c' + | |-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | `-'c' | `-';' `-'}' )txt")); @@ -3876,11 +3919,12 @@ void test() { TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen |-CompoundStatement Statement @@ -3913,11 +3957,12 @@ void test() BRACES TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen unmodifiable `-'}' CloseParen unmodifiable @@ -3936,11 +3981,12 @@ void test() { TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen |-IfStatement Statement @@ -3980,11 +4026,12 @@ void test() { TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen |-ExpressionStatement Statement @@ -4018,11 +4065,12 @@ void test() { TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-CompoundStatement |-'{' OpenParen |-IfStatement Statement @@ -4104,11 +4152,12 @@ void test() { {R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - |-'s' - |-'=' - `-IntegerLiteralExpression - `-'1' LiteralToken +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + |-'s' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken )txt"})); } @@ -4133,36 +4182,39 @@ void test(){ {R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - `-UnknownExpression - |-'s0' - |-'{' - `-'}' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-UnknownExpression + |-'s0' + |-'{' + `-'}' )txt", R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - `-UnknownExpression - |-'s1' - |-'{' - |-IntegerLiteralExpression - | `-'1' LiteralToken - `-'}' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-UnknownExpression + |-'s1' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' )txt", R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - `-UnknownExpression - |-'s2' - |-'{' - |-IntegerLiteralExpression - | `-'1' LiteralToken - |-',' - |-FloatingLiteralExpression - | `-'2.' LiteralToken - `-'}' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + `-UnknownExpression + |-'s2' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' )txt"})); } @@ -4187,39 +4239,42 @@ void test() { {R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - |-'s0' - |-'=' - `-UnknownExpression - |-'{' - `-'}' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + |-'s0' + |-'=' + `-UnknownExpression + |-'{' + `-'}' )txt", R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - |-'s1' - |-'=' - `-UnknownExpression - |-'{' - |-IntegerLiteralExpression - | `-'1' LiteralToken - `-'}' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + |-'s1' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' )txt", R"txt( SimpleDeclaration |-'S' -`-SimpleDeclarator Declarator - |-'s2' - |-'=' - `-UnknownExpression - |-'{' - |-IntegerLiteralExpression - | `-'1' LiteralToken - |-',' - |-FloatingLiteralExpression - | `-'2.' LiteralToken - `-'}' +`-DeclaratorList Declarators + `-SimpleDeclarator ListElement + |-'s2' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' )txt"})); } @@ -4240,28 +4295,30 @@ struct S { {R"txt( SimpleDeclaration |-'S' -|-SimpleDeclarator Declarator -| `-UnknownExpression -| |-'s1' -| |-'(' -| |-IntegerLiteralExpression -| | `-'1' LiteralToken -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| `-UnknownExpression +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' `-';' )txt", R"txt( SimpleDeclaration |-'S' -|-SimpleDeclarator Declarator -| `-UnknownExpression -| |-'s2' -| |-'(' -| |-IntegerLiteralExpression -| | `-'1' LiteralToken -| |-',' -| |-FloatingLiteralExpression -| | `-'2.' LiteralToken -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' `-';' )txt"})); } @@ -4283,35 +4340,38 @@ struct S { {R"txt( SimpleDeclaration |-'S' -|-SimpleDeclarator Declarator -| `-'s0' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| `-'s0' `-';' )txt", R"txt( SimpleDeclaration |-'S' -|-SimpleDeclarator Declarator -| `-UnknownExpression -| |-'s1' -| |-'(' -| |-IntegerLiteralExpression -| | `-'1' LiteralToken -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| `-UnknownExpression +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' `-';' )txt", R"txt( SimpleDeclaration |-'S' -|-SimpleDeclarator Declarator -| `-UnknownExpression -| |-'s2' -| |-'(' -| |-IntegerLiteralExpression -| | `-'1' LiteralToken -| |-',' -| |-FloatingLiteralExpression -| | `-'2.' LiteralToken -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' `-';' )txt"})); } @@ -4518,13 +4578,14 @@ int a[10]; TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'a' - | `-ArraySubscript - | |-'[' OpenParen - | |-IntegerLiteralExpression Size - | | `-'10' LiteralToken - | `-']' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'a' + | `-ArraySubscript + | |-'[' OpenParen + | |-IntegerLiteralExpression Size + | | `-'10' LiteralToken + | `-']' CloseParen `-';' )txt")); } @@ -4538,23 +4599,24 @@ int b[1][2][3]; TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'b' - | |-ArraySubscript - | | |-'[' OpenParen - | | |-IntegerLiteralExpression Size - | | | `-'1' LiteralToken - | | `-']' CloseParen - | |-ArraySubscript - | | |-'[' OpenParen - | | |-IntegerLiteralExpression Size - | | | `-'2' LiteralToken - | | `-']' CloseParen - | `-ArraySubscript - | |-'[' OpenParen - | |-IntegerLiteralExpression Size - | | `-'3' LiteralToken - | `-']' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'b' + | |-ArraySubscript + | | |-'[' OpenParen + | | |-IntegerLiteralExpression Size + | | | `-'1' LiteralToken + | | `-']' CloseParen + | |-ArraySubscript + | | |-'[' OpenParen + | | |-IntegerLiteralExpression Size + | | | `-'2' LiteralToken + | | `-']' CloseParen + | `-ArraySubscript + | |-'[' OpenParen + | |-IntegerLiteralExpression Size + | | `-'3' LiteralToken + | `-']' CloseParen `-';' )txt")); } @@ -4568,24 +4630,25 @@ int c[] = {1,2,3}; TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'c' - | |-ArraySubscript - | | |-'[' OpenParen - | | `-']' CloseParen - | |-'=' - | `-UnknownExpression + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'c' + | |-ArraySubscript + | | |-'[' OpenParen + | | `-']' CloseParen + | |-'=' | `-UnknownExpression - | |-'{' - | |-IntegerLiteralExpression - | | `-'1' LiteralToken - | |-',' - | |-IntegerLiteralExpression - | | `-'2' LiteralToken - | |-',' - | |-IntegerLiteralExpression - | | `-'3' LiteralToken - | `-'}' + | `-UnknownExpression + | |-'{' + | |-IntegerLiteralExpression + | | `-'1' LiteralToken + | |-',' + | |-IntegerLiteralExpression + | | `-'2' LiteralToken + | |-',' + | |-IntegerLiteralExpression + | | `-'3' LiteralToken + | `-'}' `-';' )txt")); } @@ -4602,22 +4665,24 @@ void f(int xs[static 10]); TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'f' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | `-SimpleDeclaration ListElement - | | |-'int' - | | `-SimpleDeclarator Declarator - | | |-'xs' - | | `-ArraySubscript - | | |-'[' OpenParen - | | |-'static' - | | |-IntegerLiteralExpression Size - | | | `-'10' LiteralToken - | | `-']' CloseParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'f' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | `-SimpleDeclaration ListElement + | | |-'int' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-'xs' + | | `-ArraySubscript + | | |-'[' OpenParen + | | |-'static' + | | |-IntegerLiteralExpression Size + | | | `-'10' LiteralToken + | | `-']' CloseParen + | `-')' CloseParen `-';' )txt")); } @@ -4631,11 +4696,12 @@ int func(); TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'func' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'func' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | `-')' CloseParen `-';' )txt")); } @@ -4651,48 +4717,55 @@ int func3(int a, float b); TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'func1' -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | |-ParameterDeclarationList Parameters -| | | `-SimpleDeclaration ListElement -| | | |-'int' -| | | `-SimpleDeclarator Declarator -| | | `-'a' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'func1' +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | |-ParameterDeclarationList Parameters +| | | `-SimpleDeclaration ListElement +| | | |-'int' +| | | `-DeclaratorList Declarators +| | | `-SimpleDeclarator ListElement +| | | `-'a' +| | `-')' CloseParen | `-';' |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'func2' -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | |-ParameterDeclarationList Parameters -| | | `-SimpleDeclaration ListElement -| | | |-'int' -| | | `-SimpleDeclarator Declarator -| | | |-'*' -| | | `-'ap' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'func2' +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | |-ParameterDeclarationList Parameters +| | | `-SimpleDeclaration ListElement +| | | |-'int' +| | | `-DeclaratorList Declarators +| | | `-SimpleDeclarator ListElement +| | | |-'*' +| | | `-'ap' +| | `-')' CloseParen | `-';' `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'func3' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | |-'int' - | | | `-SimpleDeclarator Declarator - | | | `-'a' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | |-'float' - | | `-SimpleDeclarator Declarator - | | `-'b' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'func3' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | |-'int' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | `-'a' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | |-'float' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | `-'b' + | `-')' CloseParen `-';' )txt")); } @@ -4708,41 +4781,45 @@ int func3(int, float); TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'func1' -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | |-ParameterDeclarationList Parameters -| | | `-SimpleDeclaration ListElement -| | | `-'int' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'func1' +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | |-ParameterDeclarationList Parameters +| | | `-SimpleDeclaration ListElement +| | | `-'int' +| | `-')' CloseParen | `-';' |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'func2' -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | |-ParameterDeclarationList Parameters -| | | `-SimpleDeclaration ListElement -| | | |-'int' -| | | `-SimpleDeclarator Declarator -| | | `-'*' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'func2' +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | |-ParameterDeclarationList Parameters +| | | `-SimpleDeclaration ListElement +| | | |-'int' +| | | `-DeclaratorList Declarators +| | | `-SimpleDeclarator ListElement +| | | `-'*' +| | `-')' CloseParen | `-';' `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'func3' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | `-'int' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | `-'float' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'func3' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | `-'int' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | `-'float' + | `-')' CloseParen `-';' )txt")); } @@ -4760,11 +4837,12 @@ int func1([[int a = 1]]); ParameterDeclarationList Parameters `-SimpleDeclaration ListElement |-'int' - `-SimpleDeclarator Declarator - |-'a' - |-'=' - `-IntegerLiteralExpression - `-'1' LiteralToken + `-DeclaratorList Declarators + `-SimpleDeclarator ListElement + |-'a' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken )txt"})); } @@ -4781,25 +4859,28 @@ int func2([[int *ap, int a = 1, char c = '2']]); ParameterDeclarationList Parameters |-SimpleDeclaration ListElement | |-'int' -| `-SimpleDeclarator Declarator -| |-'*' -| `-'ap' +| `-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'*' +| `-'ap' |-',' ListDelimiter |-SimpleDeclaration ListElement | |-'int' -| `-SimpleDeclarator Declarator -| |-'a' -| |-'=' -| `-IntegerLiteralExpression -| `-'1' LiteralToken +| `-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'a' +| |-'=' +| `-IntegerLiteralExpression +| `-'1' LiteralToken |-',' ListDelimiter `-SimpleDeclaration ListElement |-'char' - `-SimpleDeclarator Declarator - |-'c' - |-'=' - `-CharacterLiteralExpression - `-''2'' LiteralToken + `-DeclaratorList Declarators + `-SimpleDeclarator ListElement + |-'c' + |-'=' + `-CharacterLiteralExpression + `-''2'' LiteralToken )txt"})); } @@ -4816,18 +4897,19 @@ template {R"txt( SimpleDeclaration |-'void' -|-SimpleDeclarator Declarator -| |-'test' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-ParameterDeclarationList Parameters -| | |-SimpleDeclaration ListElement -| | | `-'T' -| | |-',' ListDelimiter -| | `-SimpleDeclaration ListElement -| | |-'Args' -| | `-'...' -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'test' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-ParameterDeclarationList Parameters +| | |-SimpleDeclaration ListElement +| | | `-'T' +| | |-',' ListDelimiter +| | `-SimpleDeclaration ListElement +| | |-'Args' +| | `-'...' +| `-')' CloseParen `-';' )txt"})); } @@ -4845,22 +4927,25 @@ template {R"txt( SimpleDeclaration |-'void' -|-SimpleDeclarator Declarator -| |-'test' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-ParameterDeclarationList Parameters -| | |-SimpleDeclaration ListElement -| | | |-'T' -| | | `-SimpleDeclarator Declarator -| | | `-'t' -| | |-',' ListDelimiter -| | `-SimpleDeclaration ListElement -| | |-'Args' -| | |-'...' -| | `-SimpleDeclarator Declarator -| | `-'args' -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'test' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-ParameterDeclarationList Parameters +| | |-SimpleDeclaration ListElement +| | | |-'T' +| | | `-DeclaratorList Declarators +| | | `-SimpleDeclarator ListElement +| | | `-'t' +| | |-',' ListDelimiter +| | `-SimpleDeclaration ListElement +| | |-'Args' +| | |-'...' +| | `-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-'args' +| `-')' CloseParen `-';' )txt"})); } @@ -4878,18 +4963,19 @@ void test(int , char ...); TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'test' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | `-'int' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | `-'char' - | |-'...' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'test' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | `-'int' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | `-'char' + | |-'...' + | `-')' CloseParen `-';' )txt")); } @@ -4907,30 +4993,34 @@ int func(const int a, volatile int b, const volatile int c); TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'func' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | |-'const' - | | | |-'int' - | | | `-SimpleDeclarator Declarator - | | | `-'a' - | | |-',' ListDelimiter - | | |-SimpleDeclaration ListElement - | | | |-'volatile' - | | | |-'int' - | | | `-SimpleDeclarator Declarator - | | | `-'b' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | |-'const' - | | |-'volatile' - | | |-'int' - | | `-SimpleDeclarator Declarator - | | `-'c' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'func' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | |-'const' + | | | |-'int' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | `-'a' + | | |-',' ListDelimiter + | | |-SimpleDeclaration ListElement + | | | |-'volatile' + | | | |-'int' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | `-'b' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | |-'const' + | | |-'volatile' + | | |-'int' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | `-'c' + | `-')' CloseParen `-';' )txt")); } @@ -4947,17 +5037,19 @@ int func(int& a); TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'func' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | `-SimpleDeclaration ListElement - | | |-'int' - | | `-SimpleDeclarator Declarator - | | |-'&' - | | `-'a' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'func' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | `-SimpleDeclaration ListElement + | | |-'int' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-'&' + | | `-'a' + | `-')' CloseParen `-';' )txt")); } @@ -4975,17 +5067,19 @@ int func(int&& a); TranslationUnit Detached `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'func' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | `-SimpleDeclaration ListElement - | | |-'int' - | | `-SimpleDeclarator Declarator - | | |-'&&' - | | `-'a' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'func' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | `-SimpleDeclaration ListElement + | | |-'int' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-'&&' + | | `-'a' + | `-')' CloseParen `-';' )txt")); } @@ -5008,11 +5102,12 @@ TranslationUnit Detached |-'{' |-SimpleDeclaration | |-'int' - | |-SimpleDeclarator Declarator - | | |-'a' - | | `-ParametersAndQualifiers - | | |-'(' OpenParen - | | `-')' CloseParen + | |-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-'a' + | | `-ParametersAndQualifiers + | | |-'(' OpenParen + | | `-')' CloseParen | `-';' |-'}' `-';' @@ -5035,35 +5130,38 @@ struct Test { {R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'b' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| `-'const' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'b' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| `-'const' `-';' )txt", R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'c' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| `-'volatile' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'c' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| `-'volatile' `-';' )txt", R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'d' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| |-'const' -| `-'volatile' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'d' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| |-'const' +| `-'volatile' `-';' )txt"})); } @@ -5081,12 +5179,13 @@ struct Test { {R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'e' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| `-'&' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'e' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| `-'&' `-';' )txt"})); } @@ -5104,12 +5203,13 @@ struct Test { {R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'f' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| `-'&&' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'f' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| `-'&&' `-';' )txt"})); } @@ -5126,14 +5226,15 @@ auto foo() -> int; TranslationUnit Detached `-SimpleDeclaration |-'auto' - |-SimpleDeclarator Declarator - | |-'foo' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-')' CloseParen - | `-TrailingReturnType TrailingReturn - | |-'->' ArrowToken - | `-'int' + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'foo' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-')' CloseParen + | `-TrailingReturnType TrailingReturn + | |-'->' ArrowToken + | `-'int' `-';' )txt")); } @@ -5154,58 +5255,62 @@ struct MyException2 {}; {R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'a' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| |-'throw' -| |-'(' -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'a' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| |-'throw' +| |-'(' +| `-')' `-';' )txt", R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'b' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| |-'throw' -| |-'(' -| |-'...' -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'b' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| |-'throw' +| |-'(' +| |-'...' +| `-')' `-';' )txt", R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'c' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| |-'throw' -| |-'(' -| |-'MyException1' -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'c' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| |-'throw' +| |-'(' +| |-'MyException1' +| `-')' `-';' )txt", R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-'d' -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-')' CloseParen -| |-'throw' -| |-'(' -| |-'MyException1' -| |-',' -| |-'MyException2' -| `-')' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-'d' +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-')' CloseParen +| |-'throw' +| |-'(' +| |-'MyException1' +| |-',' +| |-'MyException2' +| `-')' `-';' )txt"})); } @@ -5223,25 +5328,27 @@ int b() noexcept(true); TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'a' -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | |-')' CloseParen -| | `-'noexcept' +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'a' +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | |-')' CloseParen +| | `-'noexcept' | `-';' `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'b' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-')' CloseParen - | |-'noexcept' - | |-'(' - | |-BoolLiteralExpression - | | `-'true' LiteralToken - | `-')' + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'b' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-')' CloseParen + | |-'noexcept' + | |-'(' + | |-BoolLiteralExpression + | | `-'true' LiteralToken + | `-')' `-';' )txt")); } @@ -5258,50 +5365,54 @@ int *(d)(int); TranslationUnit Detached |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | `-ParenDeclarator -| | |-'(' OpenParen -| | |-'a' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-ParenDeclarator +| | |-'(' OpenParen +| | |-'a' +| | `-')' CloseParen | `-';' |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-'*' -| | `-ParenDeclarator -| | |-'(' OpenParen -| | |-'b' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'*' +| | `-ParenDeclarator +| | |-'(' OpenParen +| | |-'b' +| | `-')' CloseParen | `-';' |-SimpleDeclaration | |-'int' -| |-SimpleDeclarator Declarator -| | |-ParenDeclarator -| | | |-'(' OpenParen -| | | |-'*' -| | | |-'c' -| | | `-')' CloseParen -| | `-ParametersAndQualifiers -| | |-'(' OpenParen -| | |-ParameterDeclarationList Parameters -| | | `-SimpleDeclaration ListElement -| | | `-'int' -| | `-')' CloseParen +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-ParenDeclarator +| | | |-'(' OpenParen +| | | |-'*' +| | | |-'c' +| | | `-')' CloseParen +| | `-ParametersAndQualifiers +| | |-'(' OpenParen +| | |-ParameterDeclarationList Parameters +| | | `-SimpleDeclaration ListElement +| | | `-'int' +| | `-')' CloseParen | `-';' `-SimpleDeclaration |-'int' - |-SimpleDeclarator Declarator - | |-'*' - | |-ParenDeclarator - | | |-'(' OpenParen - | | |-'d' - | | `-')' CloseParen - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | `-SimpleDeclaration ListElement - | | `-'int' - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'*' + | |-ParenDeclarator + | | |-'(' OpenParen + | | |-'d' + | | `-')' CloseParen + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | `-SimpleDeclaration ListElement + | | `-'int' + | `-')' CloseParen `-';' )txt")); } @@ -5317,22 +5428,24 @@ TranslationUnit Detached |-SimpleDeclaration | |-'const' | |-'int' -| |-SimpleDeclarator Declarator -| | |-'west' -| | |-'=' -| | `-PrefixUnaryOperatorExpression -| | |-'-' OperatorToken -| | `-IntegerLiteralExpression Operand -| | `-'1' LiteralToken +| |-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | |-'west' +| | |-'=' +| | `-PrefixUnaryOperatorExpression +| | |-'-' OperatorToken +| | `-IntegerLiteralExpression Operand +| | `-'1' LiteralToken | `-';' `-SimpleDeclaration |-'int' |-'const' - |-SimpleDeclarator Declarator - | |-'east' - | |-'=' - | `-IntegerLiteralExpression - | `-'1' LiteralToken + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'east' + | |-'=' + | `-IntegerLiteralExpression + | `-'1' LiteralToken `-';' )txt")); } @@ -5348,11 +5461,12 @@ TranslationUnit Detached |-'const' |-'int' |-'const' - |-SimpleDeclarator Declarator - | |-'universal' - | |-'=' - | `-IntegerLiteralExpression - | `-'0' LiteralToken + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'universal' + | |-'=' + | `-IntegerLiteralExpression + | `-'0' LiteralToken `-';' )txt")); } @@ -5369,12 +5483,13 @@ TranslationUnit Detached |-'const' |-'int' |-'const' - |-SimpleDeclarator Declarator - | |-'*' - | |-'const' - | |-'*' - | |-'volatile' - | `-'b' + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'*' + | |-'const' + | |-'*' + | |-'volatile' + | `-'b' `-';' )txt")); } @@ -5391,30 +5506,31 @@ auto foo() -> auto(*)(int) -> double*; TranslationUnit Detached `-SimpleDeclaration |-'auto' - |-SimpleDeclarator Declarator - | |-'foo' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-')' CloseParen - | `-TrailingReturnType TrailingReturn - | |-'->' ArrowToken - | |-'auto' - | `-SimpleDeclarator Declarator - | |-ParenDeclarator - | | |-'(' OpenParen - | | |-'*' - | | `-')' CloseParen - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | `-SimpleDeclaration ListElement - | | `-'int' - | |-')' CloseParen - | `-TrailingReturnType TrailingReturn - | |-'->' ArrowToken - | |-'double' - | `-SimpleDeclarator Declarator - | `-'*' + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'foo' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-')' CloseParen + | `-TrailingReturnType TrailingReturn + | |-'->' ArrowToken + | |-'auto' + | `-SimpleDeclarator Declarator + | |-ParenDeclarator + | | |-'(' OpenParen + | | |-'*' + | | `-')' CloseParen + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | `-SimpleDeclaration ListElement + | | `-'int' + | |-')' CloseParen + | `-TrailingReturnType TrailingReturn + | |-'->' ArrowToken + | |-'double' + | `-SimpleDeclarator Declarator + | `-'*' `-';' )txt")); } @@ -5432,24 +5548,26 @@ struct X {}; {R"txt( SimpleDeclaration |-'int' -|-SimpleDeclarator Declarator -| |-MemberPointer -| | |-'X' -| | |-'::' -| | `-'*' -| `-'a' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-MemberPointer +| | |-'X' +| | |-'::' +| | `-'*' +| `-'a' `-';' )txt", R"txt( SimpleDeclaration |-'const' |-'int' -|-SimpleDeclarator Declarator -| |-MemberPointer -| | |-'X' -| | |-'::' -| | `-'*' -| `-'b' +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-MemberPointer +| | |-'X' +| | |-'::' +| | `-'*' +| `-'b' `-';' )txt"})); } @@ -5472,70 +5590,75 @@ struct X { {R"txt( SimpleDeclaration |-'void' -|-SimpleDeclarator Declarator -| |-ParenDeclarator -| | |-'(' OpenParen -| | |-MemberPointer -| | | |-'X' -| | | |-'::' -| | | `-'*' -| | |-'xp' -| | `-')' CloseParen -| `-ParametersAndQualifiers -| |-'(' OpenParen -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-ParenDeclarator +| | |-'(' OpenParen +| | |-MemberPointer +| | | |-'X' +| | | |-'::' +| | | `-'*' +| | |-'xp' +| | `-')' CloseParen +| `-ParametersAndQualifiers +| |-'(' OpenParen +| `-')' CloseParen `-';' )txt", R"txt( SimpleDeclaration |-'void' -|-SimpleDeclarator Declarator -| |-ParenDeclarator -| | |-'(' OpenParen -| | |-MemberPointer -| | | |-'X' -| | | |-'::' -| | | `-'*' -| | |-'*' -| | |-'xpp' -| | `-')' CloseParen -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-ParameterDeclarationList Parameters -| | `-SimpleDeclaration ListElement -| | |-'const' -| | |-'int' -| | `-SimpleDeclarator Declarator -| | `-'*' -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-ParenDeclarator +| | |-'(' OpenParen +| | |-MemberPointer +| | | |-'X' +| | | |-'::' +| | | `-'*' +| | |-'*' +| | |-'xpp' +| | `-')' CloseParen +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-ParameterDeclarationList Parameters +| | `-SimpleDeclaration ListElement +| | |-'const' +| | |-'int' +| | `-DeclaratorList Declarators +| | `-SimpleDeclarator ListElement +| | `-'*' +| `-')' CloseParen `-';' )txt", R"txt( SimpleDeclaration |-'void' -|-SimpleDeclarator Declarator -| |-ParenDeclarator -| | |-'(' OpenParen -| | |-'X' -| | |-'::' -| | |-MemberPointer -| | | |-'Y' -| | | |-'::' -| | | `-'*' -| | |-'xyp' -| | `-')' CloseParen -| `-ParametersAndQualifiers -| |-'(' OpenParen -| |-ParameterDeclarationList Parameters -| | |-SimpleDeclaration ListElement -| | | |-'const' -| | | |-'int' -| | | `-SimpleDeclarator Declarator -| | | `-'*' -| | |-',' ListDelimiter -| | `-SimpleDeclaration ListElement -| | `-'char' -| `-')' CloseParen +|-DeclaratorList Declarators +| `-SimpleDeclarator ListElement +| |-ParenDeclarator +| | |-'(' OpenParen +| | |-'X' +| | |-'::' +| | |-MemberPointer +| | | |-'Y' +| | | |-'::' +| | | `-'*' +| | |-'xyp' +| | `-')' CloseParen +| `-ParametersAndQualifiers +| |-'(' OpenParen +| |-ParameterDeclarationList Parameters +| | |-SimpleDeclaration ListElement +| | | |-'const' +| | | |-'int' +| | | `-DeclaratorList Declarators +| | | `-SimpleDeclarator ListElement +| | | `-'*' +| | |-',' ListDelimiter +| | `-SimpleDeclaration ListElement +| | `-'char' +| `-')' CloseParen `-';' )txt"})); } @@ -5549,31 +5672,34 @@ void x(char a, short (*b)(int)); TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'x' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | |-'char' - | | | `-SimpleDeclarator Declarator - | | | `-'a' - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | |-'short' - | | `-SimpleDeclarator Declarator - | | |-ParenDeclarator - | | | |-'(' OpenParen - | | | |-'*' - | | | |-'b' - | | | `-')' CloseParen - | | `-ParametersAndQualifiers - | | |-'(' OpenParen - | | |-ParameterDeclarationList Parameters - | | | `-SimpleDeclaration ListElement - | | | `-'int' - | | `-')' CloseParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'x' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | |-'char' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | `-'a' + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | |-'short' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-ParenDeclarator + | | | |-'(' OpenParen + | | | |-'*' + | | | |-'b' + | | | `-')' CloseParen + | | `-ParametersAndQualifiers + | | |-'(' OpenParen + | | |-ParameterDeclarationList Parameters + | | | `-SimpleDeclaration ListElement + | | | `-'int' + | | `-')' CloseParen + | `-')' CloseParen `-';' )txt")); } @@ -5587,48 +5713,52 @@ void x(char a, short (*b)(int), long (**c)(long long)); TranslationUnit Detached `-SimpleDeclaration |-'void' - |-SimpleDeclarator Declarator - | |-'x' - | `-ParametersAndQualifiers - | |-'(' OpenParen - | |-ParameterDeclarationList Parameters - | | |-SimpleDeclaration ListElement - | | | |-'char' - | | | `-SimpleDeclarator Declarator - | | | `-'a' - | | |-',' ListDelimiter - | | |-SimpleDeclaration ListElement - | | | |-'short' - | | | `-SimpleDeclarator Declarator - | | | |-ParenDeclarator - | | | | |-'(' OpenParen - | | | | |-'*' - | | | | |-'b' - | | | | `-')' CloseParen - | | | `-ParametersAndQualifiers - | | | |-'(' OpenParen - | | | |-ParameterDeclarationList Parameters - | | | | `-SimpleDeclaration ListElement - | | | | `-'int' - | | | `-')' CloseParen - | | |-',' ListDelimiter - | | `-SimpleDeclaration ListElement - | | |-'long' - | | `-SimpleDeclarator Declarator - | | |-ParenDeclarator - | | | |-'(' OpenParen - | | | |-'*' - | | | |-'*' - | | | |-'c' - | | | `-')' CloseParen - | | `-ParametersAndQualifiers - | | |-'(' OpenParen - | | |-ParameterDeclarationList Parameters - | | | `-SimpleDeclaration ListElement - | | | |-'long' - | | | `-'long' - | | `-')' CloseParen - | `-')' CloseParen + |-DeclaratorList Declarators + | `-SimpleDeclarator ListElement + | |-'x' + | `-ParametersAndQualifiers + | |-'(' OpenParen + | |-ParameterDeclarationList Parameters + | | |-SimpleDeclaration ListElement + | | | |-'char' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | `-'a' + | | |-',' ListDelimiter + | | |-SimpleDeclaration ListElement + | | | |-'short' + | | | `-DeclaratorList Declarators + | | | `-SimpleDeclarator ListElement + | | | |-ParenDeclarator + | | | | |-'(' OpenParen + | | | | |-'*' + | | | | |-'b' + | | | | `-')' CloseParen + | | | `-ParametersAndQualifiers + | | | |-'(' OpenParen + | | | |-ParameterDeclarationList Parameters + | | | | `-SimpleDeclaration ListElement + | | | | `-'int' + | | | `-')' CloseParen + | | |-',' ListDelimiter + | | `-SimpleDeclaration ListElement + | | |-'long' + | | `-DeclaratorList Declarators + | | `-SimpleDeclarator ListElement + | | |-ParenDeclarator + | | | |-'(' OpenParen + | | | |-'*' + | | | |-'*' + | | | |-'c' + | | | `-')' CloseParen + | | `-ParametersAndQualifiers + | | |-'(' OpenParen + | | |-ParameterDeclarationList Parameters + | | | `-SimpleDeclaration ListElement + | | | |-'long' + | | | `-'long' + | | `-')' CloseParen + | `-')' CloseParen `-';' )txt")); } diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp index 7f67b4e2e203a..b8c2334d71888 100644 --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -188,8 +188,9 @@ TEST_P(SynthesisTest, DeepCopy_Original) { TranslationUnit Detached synthesized `-SimpleDeclaration synthesized |-'int' synthesized - |-SimpleDeclarator Declarator synthesized - | `-'a' synthesized + |-DeclaratorList Declarators synthesized + | `-SimpleDeclarator ListElement synthesized + | `-'a' synthesized `-';' synthesized )txt")); } @@ -201,8 +202,9 @@ TEST_P(SynthesisTest, DeepCopy_Child) { EXPECT_TRUE(treeDumpEqual(Copy, R"txt( SimpleDeclaration Detached synthesized |-'int' synthesized -|-SimpleDeclarator Declarator synthesized -| `-'a' synthesized +|-DeclaratorList Declarators synthesized +| `-SimpleDeclarator ListElement synthesized +| `-'a' synthesized `-';' synthesized )txt")); } @@ -225,11 +227,12 @@ void test() { TranslationUnit Detached synthesized `-SimpleDeclaration synthesized |-'void' synthesized - |-SimpleDeclarator Declarator synthesized - | |-'test' synthesized - | `-ParametersAndQualifiers synthesized - | |-'(' OpenParen synthesized - | `-')' CloseParen synthesized + |-DeclaratorList Declarators synthesized + | `-SimpleDeclarator ListElement synthesized + | |-'test' synthesized + | `-ParametersAndQualifiers synthesized + | |-'(' OpenParen synthesized + | `-')' CloseParen synthesized `-CompoundStatement synthesized |-'{' OpenParen synthesized |-IfStatement Statement synthesized From 30d07b14a274f075a01d201ad59723ca1a4a9b57 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 1 Oct 2020 16:10:03 +0200 Subject: [PATCH 274/544] Revert "[clangd] clangd --check: standalone diagnosis of common problems" This reverts commit 79fbcbff41734e3d07e6200d33c3e40732dfae6a. The fallback command fails to parse for the test files if there's no compile_commands.json in the tree. --- clang-tools-extra/clangd/test/check-fail.test | 13 - clang-tools-extra/clangd/test/check.test | 13 - clang-tools-extra/clangd/tool/CMakeLists.txt | 1 - clang-tools-extra/clangd/tool/Check.cpp | 258 ------------------ clang-tools-extra/clangd/tool/ClangdMain.cpp | 33 +-- 5 files changed, 3 insertions(+), 315 deletions(-) delete mode 100644 clang-tools-extra/clangd/test/check-fail.test delete mode 100644 clang-tools-extra/clangd/test/check.test delete mode 100644 clang-tools-extra/clangd/tool/Check.cpp diff --git a/clang-tools-extra/clangd/test/check-fail.test b/clang-tools-extra/clangd/test/check-fail.test deleted file mode 100644 index 7462ce5ecf5f3..0000000000000 --- a/clang-tools-extra/clangd/test/check-fail.test +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: not clangd -check=%s 2>&1 | FileCheck -strict-whitespace %s - -// CHECK: Testing on source file {{.*}}check-fail.test -// CHECK: internal (cc1) args are: -cc1 -// CHECK: Building preamble... -// CHECK: [pp_file_not_found] Line {{.*}}: 'missing.h' file not found -// CHECK: Building AST... -// CHECK: Testing features at each token -// CHECK: tweak: ExpandAutoType ==> FAIL -// CHECK: All checks completed, 2 errors - -#include "missing.h" -auto x = []{}; diff --git a/clang-tools-extra/clangd/test/check.test b/clang-tools-extra/clangd/test/check.test deleted file mode 100644 index 832629ce29ef8..0000000000000 --- a/clang-tools-extra/clangd/test/check.test +++ /dev/null @@ -1,13 +0,0 @@ -# RUN: clangd -log=verbose -check 2>&1 | FileCheck -strict-whitespace %s - -CHECK: Testing on source file {{.*}}test.cc -CHECK: internal (cc1) args are: -cc1 -CHECK: Building preamble... -CHECK: Built preamble -CHECK: Building AST... -CHECK: Testing features at each token -CHECK-DAG: hover: false -CHECK-DAG: hover: true -CHECK-DAG: tweak: AddUsing -CHECK: All checks completed, 0 errors - diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index 65e0aa35f2654..670e5a17013ab 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -3,7 +3,6 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) add_clang_tool(clangd ClangdMain.cpp - Check.cpp $ ) diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp deleted file mode 100644 index 14ee0fdec9c91..0000000000000 --- a/clang-tools-extra/clangd/tool/Check.cpp +++ /dev/null @@ -1,258 +0,0 @@ -//===--- Check.cpp - clangd self-diagnostics ------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Many basic problems can occur processing a file in clangd, e.g.: -// - system includes are not found -// - crash when indexing its AST -// clangd --check provides a simplified, isolated way to reproduce these, -// with no editor, LSP, threads, background indexing etc to contend with. -// -// One important use case is gathering information for bug reports. -// Another is reproducing crashes, and checking which setting prevent them. -// -// It simulates opening a file (determining compile command, parsing, indexing) -// and then running features at many locations. -// -// Currently it adds some basic logging of progress and results. -// We should consider extending it to also recognize common symptoms and -// recommend solutions (e.g. standard library installation issues). -// -//===----------------------------------------------------------------------===// - -#include "ClangdLSPServer.h" -#include "CodeComplete.h" -#include "GlobalCompilationDatabase.h" -#include "Hover.h" -#include "ParsedAST.h" -#include "Preamble.h" -#include "SourceCode.h" -#include "XRefs.h" -#include "index/CanonicalIncludes.h" -#include "index/FileIndex.h" -#include "refactor/Tweak.h" -#include "support/ThreadsafeFS.h" -#include "clang/AST/ASTContext.h" -#include "clang/Basic/DiagnosticIDs.h" -#include "clang/Format/Format.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Tooling/CompilationDatabase.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Path.h" - -namespace clang { -namespace clangd { -namespace { - -// Print (and count) the error-level diagnostics (warnings are ignored). -unsigned showErrors(llvm::ArrayRef Diags) { - unsigned ErrCount = 0; - for (const auto &D : Diags) { - if (D.Severity >= DiagnosticsEngine::Error) { - elog("[{0}] Line {1}: {2}", D.Name, D.Range.start.line + 1, D.Message); - ++ErrCount; - } - } - return ErrCount; -} - -// This class is just a linear pipeline whose functions get called in sequence. -// Each exercises part of clangd's logic on our test file and logs results. -// Later steps depend on state built in earlier ones (such as the AST). -// Many steps can fatally fail (return false), then subsequent ones cannot run. -// Nonfatal failures are logged and tracked in ErrCount. -class Checker { - // from constructor - std::string File; - ClangdLSPServer::Options Opts; - // from buildCommand - tooling::CompileCommand Cmd; - // from buildInvocation - ParseInputs Inputs; - std::unique_ptr Invocation; - format::FormatStyle Style; - // from buildAST - std::shared_ptr Preamble; - llvm::Optional AST; - FileIndex Index; - -public: - // Number of non-fatal errors seen. - unsigned ErrCount = 0; - - Checker(llvm::StringRef File, const ClangdLSPServer::Options &Opts) - : File(File), Opts(Opts) {} - - // Read compilation database and choose a compile command for the file. - bool buildCommand() { - log("Loading compilation database..."); - std::unique_ptr BaseCDB = - std::make_unique( - Opts.CompileCommandsDir); - BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), - std::move(BaseCDB)); - auto Mangler = CommandMangler::detect(); - if (Opts.ResourceDir) - Mangler.ResourceDir = *Opts.ResourceDir; - auto CDB = std::make_unique( - BaseCDB.get(), std::vector{}, - tooling::ArgumentsAdjuster(std::move(Mangler))); - - if (auto TrueCmd = CDB->getCompileCommand(File)) { - Cmd = std::move(*TrueCmd); - log("Compile command from CDB is: {0}", llvm::join(Cmd.CommandLine, " ")); - } else { - Cmd = CDB->getFallbackCommand(File); - log("Generic fallback command is: {0}", llvm::join(Cmd.CommandLine, " ")); - } - - return true; - } - - // Prepare inputs and build CompilerInvocation (parsed compile command). - bool buildInvocation(const ThreadsafeFS &TFS, - llvm::Optional Contents) { - StoreDiags CaptureInvocationDiags; - std::vector CC1Args; - Inputs.CompileCommand = Cmd; - Inputs.TFS = &TFS; - if (Contents.hasValue()) { - Inputs.Contents = *Contents; - log("Imaginary source file contents:\n{0}", Inputs.Contents); - } else { - if (auto Contents = TFS.view(llvm::None)->getBufferForFile(File)) { - Inputs.Contents = Contents->get()->getBuffer().str(); - } else { - elog("Couldn't read {0}: {1}", File, Contents.getError().message()); - return false; - } - } - Inputs.Opts.ClangTidyOpts = - Opts.GetClangTidyOptions(*TFS.view(llvm::None), File); - log("Parsing command..."); - Invocation = - buildCompilerInvocation(Inputs, CaptureInvocationDiags, &CC1Args); - auto InvocationDiags = CaptureInvocationDiags.take(); - ErrCount += showErrors(InvocationDiags); - log("internal (cc1) args are: {0}", llvm::join(CC1Args, " ")); - if (!Invocation) { - elog("Failed to parse command line"); - return false; - } - - // FIXME: Check that resource-dir/built-in-headers exist? - - Style = getFormatStyleForFile(File, Inputs.Contents, TFS); - - return true; - } - - // Build preamble and AST, and index them. - bool buildAST() { - log("Building preamble..."); - Preamble = - buildPreamble(File, *Invocation, Inputs, /*StoreInMemory=*/true, - [&](ASTContext &Ctx, std::shared_ptr PP, - const CanonicalIncludes &Includes) { - if (!Opts.BuildDynamicSymbolIndex) - return; - log("Indexing headers..."); - Index.updatePreamble(File, /*Version=*/"null", Ctx, - std::move(PP), Includes); - }); - if (!Preamble) { - elog("Failed to build preamble"); - return false; - } - ErrCount += showErrors(Preamble->Diags); - - log("Building AST..."); - AST = ParsedAST::build(File, Inputs, std::move(Invocation), - /*InvocationDiags=*/std::vector{}, Preamble); - if (!AST) { - elog("Failed to build AST"); - return false; - } - ErrCount += showErrors(llvm::makeArrayRef(AST->getDiagnostics()) - .drop_front(Preamble->Diags.size())); - - if (Opts.BuildDynamicSymbolIndex) { - log("Indexing AST..."); - Index.updateMain(File, *AST); - } - return true; - } - - // Run AST-based features at each token in the file. - void testLocationFeatures() { - log("Testing features at each token (may be slow in large files)"); - auto SpelledTokens = - AST->getTokens().spelledTokens(AST->getSourceManager().getMainFileID()); - for (const auto &Tok : SpelledTokens) { - unsigned Start = AST->getSourceManager().getFileOffset(Tok.location()); - unsigned End = Start + Tok.length(); - Position Pos = offsetToPosition(Inputs.Contents, Start); - // FIXME: dumping the tokens may leak sensitive code into bug reports. - // Add an option to turn this off, once we decide how options work. - vlog(" {0} {1}", Pos, Tok.text(AST->getSourceManager())); - auto Tree = SelectionTree::createRight(AST->getASTContext(), - AST->getTokens(), Start, End); - Tweak::Selection Selection(&Index, *AST, Start, End, std::move(Tree)); - for (const auto &T : prepareTweaks(Selection, Opts.TweakFilter)) { - auto Result = T->apply(Selection); - if (!Result) { - elog(" tweak: {0} ==> FAIL: {1}", T->id(), Result.takeError()); - ++ErrCount; - } else { - vlog(" tweak: {0}", T->id()); - } - } - unsigned Definitions = locateSymbolAt(*AST, Pos, &Index).size(); - vlog(" definition: {0}", Definitions); - - auto Hover = getHover(*AST, Pos, Style, &Index); - vlog(" hover: {0}", Hover.hasValue()); - - // FIXME: it'd be nice to include code completion, but it's too slow. - // Maybe in combination with a line restriction? - } - } -}; - -} // namespace - -bool check(llvm::StringRef File, const ThreadsafeFS &TFS, - const ClangdLSPServer::Options &Opts) { - llvm::SmallString<0> FakeFile; - llvm::Optional Contents; - if (File.empty()) { - llvm::sys::path::system_temp_directory(false, FakeFile); - llvm::sys::path::append(FakeFile, "test.cc"); - File = FakeFile; - Contents = R"cpp( - #include - #include - - size_t N = 50; - auto xxx = std::string(N, 'x'); - )cpp"; - } - log("Testing on source file {0}", File); - - Checker C(File, Opts); - if (!C.buildCommand() || !C.buildInvocation(TFS, Contents) || !C.buildAST()) - return false; - C.testLocationFeatures(); - - log("All checks completed, {0} errors", C.ErrCount); - return C.ErrCount == 0; -} - -} // namespace clangd -} // namespace clang diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 98daaf9573597..a897a9a3531d2 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -47,11 +47,6 @@ namespace clang { namespace clangd { - -// Implemented in Check.cpp. -bool check(const llvm::StringRef File, const ThreadsafeFS &TFS, - const ClangdLSPServer::Options &Opts); - namespace { using llvm::cl::cat; @@ -62,7 +57,6 @@ using llvm::cl::init; using llvm::cl::list; using llvm::cl::opt; using llvm::cl::OptionCategory; -using llvm::cl::ValueOptional; using llvm::cl::values; // All flags must be placed in a category, or they will be shown neither in @@ -360,16 +354,6 @@ opt Test{ Hidden, }; -opt CheckFile{ - "check", - cat(Misc), - desc("Parse one file in isolation instead of acting as a language server. " - "Useful to investigate/reproduce crashes or configuration problems. " - "With --check=, attempts to parse a particular file."), - init(""), - ValueOptional, -}; - enum PCHStorageFlag { Disk, Memory }; opt PCHStorage{ "pch-storage", @@ -557,8 +541,7 @@ const char TestScheme::TestDir[] = "/clangd-test"; enum class ErrorResultCode : int { NoShutdownRequest = 1, - CantRunAsXPCService = 2, - CheckFailed = 3 + CantRunAsXPCService = 2 }; int main(int argc, char *argv[]) { @@ -663,8 +646,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // If a user ran `clangd` in a terminal without redirecting anything, // it's somewhat likely they're confused about how to use clangd. // Show them the help overview, which explains. - if (llvm::outs().is_displayed() && llvm::errs().is_displayed() && - !CheckFile.getNumOccurrences()) + if (llvm::outs().is_displayed() && llvm::errs().is_displayed()) llvm::errs() << Overview << "\n"; // Use buffered stream to stderr (we still flush each log message). Unbuffered // stream can cause significant (non-deterministic) latency for the logger. @@ -843,15 +825,6 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // Shall we allow to customize the file limit? Opts.Rename.AllowCrossFile = CrossFileRename; - if (CheckFile.getNumOccurrences()) { - llvm::SmallString<256> Path; - llvm::sys::fs::real_path(CheckFile, Path, /*expand_tilde=*/true); - log("Entering check mode (no LSP server)"); - return check(Path, TFS, Opts) - ? 0 - : static_cast(ErrorResultCode::CheckFailed); - } - // Initialize and run ClangdLSPServer. // Change stdin to binary to not lose \r\n on windows. llvm::sys::ChangeStdinToBinary(); @@ -862,7 +835,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var TransportLayer = newXPCTransport(); #else llvm::errs() << "This clangd binary wasn't built with XPC support.\n"; - return static_cast(ErrorResultCode::CantRunAsXPCService); + return (int)ErrorResultCode::CantRunAsXPCService; #endif } else { log("Starting LSP over stdin/stdout"); From f6b1323bc680812e04904293854c356530985bcd Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 1 Oct 2020 16:14:31 +0200 Subject: [PATCH 275/544] Reland [clangd] clangd --check: standalone diagnosis of common problems This reverts commit 30d07b14a274f075a01d201ad59723ca1a4a9b57. Test failures have (hopefully) been fixed. --- clang-tools-extra/clangd/test/check-fail.test | 14 + clang-tools-extra/clangd/test/check.test | 13 + clang-tools-extra/clangd/tool/CMakeLists.txt | 1 + clang-tools-extra/clangd/tool/Check.cpp | 258 ++++++++++++++++++ clang-tools-extra/clangd/tool/ClangdMain.cpp | 33 ++- 5 files changed, 316 insertions(+), 3 deletions(-) create mode 100644 clang-tools-extra/clangd/test/check-fail.test create mode 100644 clang-tools-extra/clangd/test/check.test create mode 100644 clang-tools-extra/clangd/tool/Check.cpp diff --git a/clang-tools-extra/clangd/test/check-fail.test b/clang-tools-extra/clangd/test/check-fail.test new file mode 100644 index 0000000000000..0ee777f02cc55 --- /dev/null +++ b/clang-tools-extra/clangd/test/check-fail.test @@ -0,0 +1,14 @@ +// RUN: cp %s %t.cpp +// RUN: not clangd -check=%t.cpp 2>&1 | FileCheck -strict-whitespace %s + +// CHECK: Testing on source file {{.*}}check-fail.test +// CHECK: internal (cc1) args are: -cc1 +// CHECK: Building preamble... +// CHECK: [pp_file_not_found] Line {{.*}}: 'missing.h' file not found +// CHECK: Building AST... +// CHECK: Testing features at each token +// CHECK: tweak: ExpandAutoType ==> FAIL +// CHECK: All checks completed, 2 errors + +#include "missing.h" +auto x = []{}; diff --git a/clang-tools-extra/clangd/test/check.test b/clang-tools-extra/clangd/test/check.test new file mode 100644 index 0000000000000..832629ce29ef8 --- /dev/null +++ b/clang-tools-extra/clangd/test/check.test @@ -0,0 +1,13 @@ +# RUN: clangd -log=verbose -check 2>&1 | FileCheck -strict-whitespace %s + +CHECK: Testing on source file {{.*}}test.cc +CHECK: internal (cc1) args are: -cc1 +CHECK: Building preamble... +CHECK: Built preamble +CHECK: Building AST... +CHECK: Testing features at each token +CHECK-DAG: hover: false +CHECK-DAG: hover: true +CHECK-DAG: tweak: AddUsing +CHECK: All checks completed, 0 errors + diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index 670e5a17013ab..65e0aa35f2654 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -3,6 +3,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) add_clang_tool(clangd ClangdMain.cpp + Check.cpp $ ) diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp new file mode 100644 index 0000000000000..14ee0fdec9c91 --- /dev/null +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -0,0 +1,258 @@ +//===--- Check.cpp - clangd self-diagnostics ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Many basic problems can occur processing a file in clangd, e.g.: +// - system includes are not found +// - crash when indexing its AST +// clangd --check provides a simplified, isolated way to reproduce these, +// with no editor, LSP, threads, background indexing etc to contend with. +// +// One important use case is gathering information for bug reports. +// Another is reproducing crashes, and checking which setting prevent them. +// +// It simulates opening a file (determining compile command, parsing, indexing) +// and then running features at many locations. +// +// Currently it adds some basic logging of progress and results. +// We should consider extending it to also recognize common symptoms and +// recommend solutions (e.g. standard library installation issues). +// +//===----------------------------------------------------------------------===// + +#include "ClangdLSPServer.h" +#include "CodeComplete.h" +#include "GlobalCompilationDatabase.h" +#include "Hover.h" +#include "ParsedAST.h" +#include "Preamble.h" +#include "SourceCode.h" +#include "XRefs.h" +#include "index/CanonicalIncludes.h" +#include "index/FileIndex.h" +#include "refactor/Tweak.h" +#include "support/ThreadsafeFS.h" +#include "clang/AST/ASTContext.h" +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Format/Format.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Path.h" + +namespace clang { +namespace clangd { +namespace { + +// Print (and count) the error-level diagnostics (warnings are ignored). +unsigned showErrors(llvm::ArrayRef Diags) { + unsigned ErrCount = 0; + for (const auto &D : Diags) { + if (D.Severity >= DiagnosticsEngine::Error) { + elog("[{0}] Line {1}: {2}", D.Name, D.Range.start.line + 1, D.Message); + ++ErrCount; + } + } + return ErrCount; +} + +// This class is just a linear pipeline whose functions get called in sequence. +// Each exercises part of clangd's logic on our test file and logs results. +// Later steps depend on state built in earlier ones (such as the AST). +// Many steps can fatally fail (return false), then subsequent ones cannot run. +// Nonfatal failures are logged and tracked in ErrCount. +class Checker { + // from constructor + std::string File; + ClangdLSPServer::Options Opts; + // from buildCommand + tooling::CompileCommand Cmd; + // from buildInvocation + ParseInputs Inputs; + std::unique_ptr Invocation; + format::FormatStyle Style; + // from buildAST + std::shared_ptr Preamble; + llvm::Optional AST; + FileIndex Index; + +public: + // Number of non-fatal errors seen. + unsigned ErrCount = 0; + + Checker(llvm::StringRef File, const ClangdLSPServer::Options &Opts) + : File(File), Opts(Opts) {} + + // Read compilation database and choose a compile command for the file. + bool buildCommand() { + log("Loading compilation database..."); + std::unique_ptr BaseCDB = + std::make_unique( + Opts.CompileCommandsDir); + BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), + std::move(BaseCDB)); + auto Mangler = CommandMangler::detect(); + if (Opts.ResourceDir) + Mangler.ResourceDir = *Opts.ResourceDir; + auto CDB = std::make_unique( + BaseCDB.get(), std::vector{}, + tooling::ArgumentsAdjuster(std::move(Mangler))); + + if (auto TrueCmd = CDB->getCompileCommand(File)) { + Cmd = std::move(*TrueCmd); + log("Compile command from CDB is: {0}", llvm::join(Cmd.CommandLine, " ")); + } else { + Cmd = CDB->getFallbackCommand(File); + log("Generic fallback command is: {0}", llvm::join(Cmd.CommandLine, " ")); + } + + return true; + } + + // Prepare inputs and build CompilerInvocation (parsed compile command). + bool buildInvocation(const ThreadsafeFS &TFS, + llvm::Optional Contents) { + StoreDiags CaptureInvocationDiags; + std::vector CC1Args; + Inputs.CompileCommand = Cmd; + Inputs.TFS = &TFS; + if (Contents.hasValue()) { + Inputs.Contents = *Contents; + log("Imaginary source file contents:\n{0}", Inputs.Contents); + } else { + if (auto Contents = TFS.view(llvm::None)->getBufferForFile(File)) { + Inputs.Contents = Contents->get()->getBuffer().str(); + } else { + elog("Couldn't read {0}: {1}", File, Contents.getError().message()); + return false; + } + } + Inputs.Opts.ClangTidyOpts = + Opts.GetClangTidyOptions(*TFS.view(llvm::None), File); + log("Parsing command..."); + Invocation = + buildCompilerInvocation(Inputs, CaptureInvocationDiags, &CC1Args); + auto InvocationDiags = CaptureInvocationDiags.take(); + ErrCount += showErrors(InvocationDiags); + log("internal (cc1) args are: {0}", llvm::join(CC1Args, " ")); + if (!Invocation) { + elog("Failed to parse command line"); + return false; + } + + // FIXME: Check that resource-dir/built-in-headers exist? + + Style = getFormatStyleForFile(File, Inputs.Contents, TFS); + + return true; + } + + // Build preamble and AST, and index them. + bool buildAST() { + log("Building preamble..."); + Preamble = + buildPreamble(File, *Invocation, Inputs, /*StoreInMemory=*/true, + [&](ASTContext &Ctx, std::shared_ptr PP, + const CanonicalIncludes &Includes) { + if (!Opts.BuildDynamicSymbolIndex) + return; + log("Indexing headers..."); + Index.updatePreamble(File, /*Version=*/"null", Ctx, + std::move(PP), Includes); + }); + if (!Preamble) { + elog("Failed to build preamble"); + return false; + } + ErrCount += showErrors(Preamble->Diags); + + log("Building AST..."); + AST = ParsedAST::build(File, Inputs, std::move(Invocation), + /*InvocationDiags=*/std::vector{}, Preamble); + if (!AST) { + elog("Failed to build AST"); + return false; + } + ErrCount += showErrors(llvm::makeArrayRef(AST->getDiagnostics()) + .drop_front(Preamble->Diags.size())); + + if (Opts.BuildDynamicSymbolIndex) { + log("Indexing AST..."); + Index.updateMain(File, *AST); + } + return true; + } + + // Run AST-based features at each token in the file. + void testLocationFeatures() { + log("Testing features at each token (may be slow in large files)"); + auto SpelledTokens = + AST->getTokens().spelledTokens(AST->getSourceManager().getMainFileID()); + for (const auto &Tok : SpelledTokens) { + unsigned Start = AST->getSourceManager().getFileOffset(Tok.location()); + unsigned End = Start + Tok.length(); + Position Pos = offsetToPosition(Inputs.Contents, Start); + // FIXME: dumping the tokens may leak sensitive code into bug reports. + // Add an option to turn this off, once we decide how options work. + vlog(" {0} {1}", Pos, Tok.text(AST->getSourceManager())); + auto Tree = SelectionTree::createRight(AST->getASTContext(), + AST->getTokens(), Start, End); + Tweak::Selection Selection(&Index, *AST, Start, End, std::move(Tree)); + for (const auto &T : prepareTweaks(Selection, Opts.TweakFilter)) { + auto Result = T->apply(Selection); + if (!Result) { + elog(" tweak: {0} ==> FAIL: {1}", T->id(), Result.takeError()); + ++ErrCount; + } else { + vlog(" tweak: {0}", T->id()); + } + } + unsigned Definitions = locateSymbolAt(*AST, Pos, &Index).size(); + vlog(" definition: {0}", Definitions); + + auto Hover = getHover(*AST, Pos, Style, &Index); + vlog(" hover: {0}", Hover.hasValue()); + + // FIXME: it'd be nice to include code completion, but it's too slow. + // Maybe in combination with a line restriction? + } + } +}; + +} // namespace + +bool check(llvm::StringRef File, const ThreadsafeFS &TFS, + const ClangdLSPServer::Options &Opts) { + llvm::SmallString<0> FakeFile; + llvm::Optional Contents; + if (File.empty()) { + llvm::sys::path::system_temp_directory(false, FakeFile); + llvm::sys::path::append(FakeFile, "test.cc"); + File = FakeFile; + Contents = R"cpp( + #include + #include + + size_t N = 50; + auto xxx = std::string(N, 'x'); + )cpp"; + } + log("Testing on source file {0}", File); + + Checker C(File, Opts); + if (!C.buildCommand() || !C.buildInvocation(TFS, Contents) || !C.buildAST()) + return false; + C.testLocationFeatures(); + + log("All checks completed, {0} errors", C.ErrCount); + return C.ErrCount == 0; +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index a897a9a3531d2..98daaf9573597 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -47,6 +47,11 @@ namespace clang { namespace clangd { + +// Implemented in Check.cpp. +bool check(const llvm::StringRef File, const ThreadsafeFS &TFS, + const ClangdLSPServer::Options &Opts); + namespace { using llvm::cl::cat; @@ -57,6 +62,7 @@ using llvm::cl::init; using llvm::cl::list; using llvm::cl::opt; using llvm::cl::OptionCategory; +using llvm::cl::ValueOptional; using llvm::cl::values; // All flags must be placed in a category, or they will be shown neither in @@ -354,6 +360,16 @@ opt Test{ Hidden, }; +opt CheckFile{ + "check", + cat(Misc), + desc("Parse one file in isolation instead of acting as a language server. " + "Useful to investigate/reproduce crashes or configuration problems. " + "With --check=, attempts to parse a particular file."), + init(""), + ValueOptional, +}; + enum PCHStorageFlag { Disk, Memory }; opt PCHStorage{ "pch-storage", @@ -541,7 +557,8 @@ const char TestScheme::TestDir[] = "/clangd-test"; enum class ErrorResultCode : int { NoShutdownRequest = 1, - CantRunAsXPCService = 2 + CantRunAsXPCService = 2, + CheckFailed = 3 }; int main(int argc, char *argv[]) { @@ -646,7 +663,8 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // If a user ran `clangd` in a terminal without redirecting anything, // it's somewhat likely they're confused about how to use clangd. // Show them the help overview, which explains. - if (llvm::outs().is_displayed() && llvm::errs().is_displayed()) + if (llvm::outs().is_displayed() && llvm::errs().is_displayed() && + !CheckFile.getNumOccurrences()) llvm::errs() << Overview << "\n"; // Use buffered stream to stderr (we still flush each log message). Unbuffered // stream can cause significant (non-deterministic) latency for the logger. @@ -825,6 +843,15 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // Shall we allow to customize the file limit? Opts.Rename.AllowCrossFile = CrossFileRename; + if (CheckFile.getNumOccurrences()) { + llvm::SmallString<256> Path; + llvm::sys::fs::real_path(CheckFile, Path, /*expand_tilde=*/true); + log("Entering check mode (no LSP server)"); + return check(Path, TFS, Opts) + ? 0 + : static_cast(ErrorResultCode::CheckFailed); + } + // Initialize and run ClangdLSPServer. // Change stdin to binary to not lose \r\n on windows. llvm::sys::ChangeStdinToBinary(); @@ -835,7 +862,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var TransportLayer = newXPCTransport(); #else llvm::errs() << "This clangd binary wasn't built with XPC support.\n"; - return (int)ErrorResultCode::CantRunAsXPCService; + return static_cast(ErrorResultCode::CantRunAsXPCService); #endif } else { log("Starting LSP over stdin/stdout"); From 56d8a37216200a3312db277e4cf054b202b39ace Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 1 Oct 2020 14:18:52 +0000 Subject: [PATCH 276/544] [gn build] Port f6b1323bc68 --- .../gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn index 6d535215156bf..870f1072956e9 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn @@ -25,5 +25,8 @@ executable("clangd") { } include_dirs = [ ".." ] - sources = [ "ClangdMain.cpp" ] + sources = [ + "Check.cpp", + "ClangdMain.cpp", + ] } From e20f4592297ba44644a8ec75b9d182a903cc0df3 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 1 Oct 2020 15:06:27 +0100 Subject: [PATCH 277/544] [AMDGPU] Simplify getNumFlatOffsetBits. NFC. Remove some checks that have already been done in the only caller. --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 9 +-------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 5dd42d1f4a6a3..8915ef8b63cc0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1712,7 +1712,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDLoc DL(N); uint64_t RemainderOffset = COffsetVal; uint64_t ImmField = 0; - const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned); + const unsigned NumBits = TII->getNumFlatOffsetBits(IsSigned); if (IsSigned) { // Use signed division by a power of two to truncate towards 0. int64_t D = 1LL << (NumBits - 1); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 8f67c2a0bbdf8..abf6869fee152 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6893,14 +6893,7 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); } -unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace, - bool Signed) const { - if (!ST.hasFlatInstOffsets()) - return 0; - - if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) - return 0; - +unsigned SIInstrInfo::getNumFlatOffsetBits(bool Signed) const { if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) return Signed ? 12 : 11; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 8149c47504c69..8ce41c4ba8b3a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1016,7 +1016,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return isUInt<12>(Imm); } - unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const; + unsigned getNumFlatOffsetBits(bool Signed) const; /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT /// encoded instruction. If \p Signed, this is for an instruction that From 91b49fc2571c3d0ac33d447629cfb7789a7fde0d Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 1 Oct 2020 10:34:50 -0400 Subject: [PATCH 278/544] [flang][openacc] Fix unparsing of combined construct (bug 47659) This patch fixes the bug report in https://bugs.llvm.org/show_bug.cgi?id=47659 Reviewed By: sameeranjoshi Differential Revision: https://reviews.llvm.org/D88597 --- flang/lib/Parser/unparse.cpp | 4 ++-- flang/test/Parser/acc-unparse.f90 | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 flang/test/Parser/acc-unparse.f90 diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ab94aa2e00c26..b558bee41dbc1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1903,8 +1903,8 @@ class UnparseVisitor { EndOpenACC(); Walk(std::get>(x.t)); BeginOpenACC(); - Walk("!$ACC END ", std::get>(x.t)); - Put("\n"); + Walk("!$ACC END ", std::get>(x.t), + "\n"); EndOpenACC(); } void Unparse(const OpenACCRoutineConstruct &x) { diff --git a/flang/test/Parser/acc-unparse.f90 b/flang/test/Parser/acc-unparse.f90 new file mode 100644 index 0000000000000..cb0edee2b6d4b --- /dev/null +++ b/flang/test/Parser/acc-unparse.f90 @@ -0,0 +1,19 @@ +! RUN: %f18 -fopenacc -funparse %s | FileCheck %s + +! Test unparse does not crash with OpenACC directives. + +! Test bug 47659 +program bug47659 + integer :: i, j + label1: do i = 1, 10 + !$acc parallel loop + do j = 1, 10 + if (j == 2) then + exit label1 + end if + end do + end do label1 +end program + +!CHECK-LABEL: PROGRAM bug47659 +!CHECK: !$ACC PARALLEL LOOP From 79410ddb96d42b72b5a7081fd680a28ae760fd51 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 1 Oct 2020 08:55:40 -0400 Subject: [PATCH 279/544] [libc++][ci] Add a configuration testing Apple's system library build Differential Revision: https://reviews.llvm.org/D88650 --- libcxx/utils/ci/buildkite-pipeline.yml | 10 ++++++++++ libcxx/utils/ci/run-buildbot.sh | 22 +++++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index db9c4c2b9fc83..d9fb0925c6d84 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -74,3 +74,13 @@ steps: command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-singlethreaded | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" + + # Build with the configuration we use to generate libc++.dylib on Apple platforms + - label: "Apple system" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-apple-system | libcxx/utils/ci/phabricator-report" + agents: + queue: "libcxx-macos-builders" + - label: "Apple system -fno-exceptions" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-apple-system-noexceptions | libcxx/utils/ci/phabricator-report" + agents: + queue: "libcxx-macos-builders" diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh index 25cdcc3ee1649..1f4b5df731ff3 100755 --- a/libcxx/utils/ci/run-buildbot.sh +++ b/libcxx/utils/ci/run-buildbot.sh @@ -10,6 +10,8 @@ set -ex BUILDER="${1}" +MONOREPO_ROOT="$(git rev-parse --show-toplevel)" +BUILD_DIR="${MONOREPO_ROOT}/build/${BUILDER}" args=() args+=("-DLLVM_ENABLE_PROJECTS=libcxx;libunwind;libcxxabi") @@ -100,19 +102,29 @@ x86_64-ubuntu-singlethreaded) args+=("-DLIBCXXABI_ENABLE_THREADS=OFF") args+=("-DLIBCXX_ENABLE_MONOTONIC_CLOCK=OFF") ;; +x86_64-apple-system) + export CC=clang + export CXX=clang++ + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake") +;; +x86_64-apple-system-noexceptions) + export CC=clang + export CXX=clang++ + args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") + args+=("-C${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake") + args+=("-DLIBCXX_ENABLE_EXCEPTIONS=OFF") + args+=("-DLIBCXXABI_ENABLE_EXCEPTIONS=OFF") +;; *) echo "${BUILDER} is not a known configuration" exit 1 ;; esac -UMBRELLA_ROOT="$(git rev-parse --show-toplevel)" -LLVM_ROOT="${UMBRELLA_ROOT}/llvm" -BUILD_DIR="${UMBRELLA_ROOT}/build/${BUILDER}" - echo "--- Generating CMake" rm -rf "${BUILD_DIR}" -cmake -S "${LLVM_ROOT}" -B "${BUILD_DIR}" -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo "${args[@]}" +cmake -S "${MONOREPO_ROOT}/llvm" -B "${BUILD_DIR}" -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo "${args[@]}" echo "--- Building libc++ and libc++abi" ninja -C "${BUILD_DIR}" check-cxx-deps cxxabi From 48c9e8244b6b8aeb6a4fd10dcf4c6995f1fec9a0 Mon Sep 17 00:00:00 2001 From: Meera Nakrani Date: Thu, 1 Oct 2020 14:55:01 +0000 Subject: [PATCH 280/544] [ARM] Removed hasSideEffects from signed/unsigned saturates Removed hasSideEffects from SSAT and USAT so that they are no longer marked as unpredictable. Differential Revision: https://reviews.llvm.org/D88545 --- llvm/lib/Target/ARM/ARMInstrThumb2.td | 1 - llvm/test/tools/llvm-mca/ARM/m4-int.s | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 3aea1925a3807..74627b0c1cdcf 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2575,7 +2575,6 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. -let hasSideEffects = 1 in class T2SatI : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> { bits<4> Rd; diff --git a/llvm/test/tools/llvm-mca/ARM/m4-int.s b/llvm/test/tools/llvm-mca/ARM/m4-int.s index b46f731f0793d..c2468efea2051 100644 --- a/llvm/test/tools/llvm-mca/ARM/m4-int.s +++ b/llvm/test/tools/llvm-mca/ARM/m4-int.s @@ -746,9 +746,9 @@ yield # CHECK-NEXT: 1 1 1.00 smulwt r0, r1, r2 # CHECK-NEXT: 1 2 1.00 smusd r0, r1, r2 # CHECK-NEXT: 1 2 1.00 smusdx r0, r1, r2 -# CHECK-NEXT: 1 1 1.00 U ssat r0, #1, r2 -# CHECK-NEXT: 1 1 1.00 U ssat r0, #1, r2, lsl #1 -# CHECK-NEXT: 1 1 1.00 U ssat16 r0, #1, r1 +# CHECK-NEXT: 1 1 1.00 ssat r0, #1, r2 +# CHECK-NEXT: 1 1 1.00 ssat r0, #1, r2, lsl #1 +# CHECK-NEXT: 1 1 1.00 ssat16 r0, #1, r1 # CHECK-NEXT: 1 1 1.00 * * U ssax r0, r1, r2 # CHECK-NEXT: 1 1 1.00 * * U ssbb # CHECK-NEXT: 1 1 1.00 * * U ssub16 r0, r1, r2 @@ -858,9 +858,9 @@ yield # CHECK-NEXT: 1 1 1.00 uqsub8 r0, r1, r2 # CHECK-NEXT: 1 1 1.00 usad8 r0, r1, r2 # CHECK-NEXT: 1 1 1.00 usada8 r0, r1, r2, r3 -# CHECK-NEXT: 1 1 1.00 U usat r0, #1, r1 -# CHECK-NEXT: 1 1 1.00 U usat r0, #1, r1, lsl #1 -# CHECK-NEXT: 1 1 1.00 U usat16 r0, #1, r1 +# CHECK-NEXT: 1 1 1.00 usat r0, #1, r1 +# CHECK-NEXT: 1 1 1.00 usat r0, #1, r1, lsl #1 +# CHECK-NEXT: 1 1 1.00 usat16 r0, #1, r1 # CHECK-NEXT: 1 1 1.00 * * U usax r0, r1, r2 # CHECK-NEXT: 1 1 1.00 * * U usub16 r0, r1, r2 # CHECK-NEXT: 1 1 1.00 * * U usub8 r0, r1, r2 From 8c36eaf0377285acb89c319582d9666e60f42007 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 28 Sep 2020 10:54:16 -0400 Subject: [PATCH 281/544] [clang][opencl][codegen] Remove the insertion of `correctly-rounded-divide-sqrt-fp-math` fn-attr. - `-cl-fp32-correctly-rounded-divide-sqrt` is already handled in a per-instruction manner by annotating the accuracy required. There's no need to add that fn-attr. So far, there's no in-tree backend handling that attr and that OpenCL specific option. - In case that out-of-tree backends are broken, this change could be reverted if those backends could not be fixed. Differential Revision: https://reviews.llvm.org/D88424 --- clang/lib/CodeGen/CGCall.cpp | 5 ----- clang/test/CodeGenOpenCL/amdgpu-attrs.cl | 2 +- clang/test/CodeGenOpenCL/fpmath.cl | 11 ----------- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index ec7ddf8b5d9ee..cb03e025e19e0 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1794,11 +1794,6 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, llvm::utostr(CodeGenOpts.SSPBufferSize)); FuncAttrs.addAttribute("no-signed-zeros-fp-math", llvm::toStringRef(LangOpts.NoSignedZero)); - if (getLangOpts().OpenCL) { - FuncAttrs.addAttribute( - "correctly-rounded-divide-sqrt-fp-math", - llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); - } // TODO: Reciprocal estimate codegen options should apply to instructions? const std::vector &Recips = CodeGenOpts.Reciprocals; diff --git a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl index 13f8b1191c2be..9156c45f4939a 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -190,5 +190,5 @@ kernel void default_kernel() { // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "correctly-rounded-divide-sqrt-fp-math"="false" +// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl index 0108d909c94e6..36cb8e68ea7c3 100644 --- a/clang/test/CodeGenOpenCL/fpmath.cl +++ b/clang/test/CodeGenOpenCL/fpmath.cl @@ -7,7 +7,6 @@ typedef __attribute__(( ext_vector_type(4) )) float float4; float spscalardiv(float a, float b) { // CHECK: @spscalardiv - // CHECK: #[[ATTR:[0-9]+]] // CHECK: fdiv{{.*}}, // NODIVOPT: !fpmath ![[MD:[0-9]+]] // DIVOPT-NOT: !fpmath ![[MD:[0-9]+]] @@ -16,7 +15,6 @@ float spscalardiv(float a, float b) { float4 spvectordiv(float4 a, float4 b) { // CHECK: @spvectordiv - // CHECK: #[[ATTR2:[0-9]+]] // CHECK: fdiv{{.*}}, // NODIVOPT: !fpmath ![[MD]] // DIVOPT-NOT: !fpmath ![[MD]] @@ -38,18 +36,9 @@ void testdbllit(long *val) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable double dpscalardiv(double a, double b) { // CHECK: @dpscalardiv - // CHECK: #[[ATTR]] // CHECK-NOT: !fpmath return a / b; } #endif -// CHECK: attributes #[[ATTR]] = { -// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false" -// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true" -// CHECK-SAME: } -// CHECK: attributes #[[ATTR2]] = { -// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false" -// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true" -// CHECK-SAME: } // NODIVOPT: ![[MD]] = !{float 2.500000e+00} From 29ac9fae54c9cbd819ce400d42dd2e76bf5259ab Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Oct 2020 16:43:59 +0100 Subject: [PATCH 282/544] [InstCombine] collectBitParts - convert to use PatterMatch matchers and avoid IntegerType casts. Make sure we're using getScalarSizeInBits instead of cast to get Type bit widths. This is preliminary cleanup before we can start adding vector support to the bswap/bitreverse (element level) matching. --- llvm/lib/Transforms/Utils/Local.cpp | 102 +++++++++++++--------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 550745673bd9f..0fd0dfa24ce96 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2832,7 +2832,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return I->second; auto &Result = BPS[V] = None; - auto BitWidth = cast(V->getType())->getBitWidth(); + auto BitWidth = V->getType()->getScalarSizeInBits(); // Prevent stack overflow by limiting the recursion depth if (Depth == BitPartRecursionMaxDepth) { @@ -2840,13 +2840,16 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } - if (Instruction *I = dyn_cast(V)) { + if (auto *I = dyn_cast(V)) { + Value *X, *Y; + const APInt *C; + // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - const auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - const auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + if (match(V, m_Or(m_Value(X), m_Value(Y)))) { + const auto &A = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &B = + collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!A || !B) return Result; @@ -2871,15 +2874,15 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, } // If this is a logical shift by a constant, recurse then shift the result. - if (I->isLogicalShift() && isa(I->getOperand(1))) { - const APInt &BitShift = cast(I->getOperand(1))->getValue(); + if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) { + const APInt &BitShift = *C; // Ensure the shift amount is defined. if (BitShift.uge(BitWidth)) return Result; - const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2899,9 +2902,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a logical 'and' with a mask that clears bits, recurse then // unset the appropriate bits. - if (I->getOpcode() == Instruction::And && - isa(I->getOperand(1))) { - const APInt &AndMask = cast(I->getOperand(1))->getValue(); + if (match(V, m_And(m_Value(X), m_APInt(C)))) { + const APInt &AndMask = *C; // Check that the mask allows a multiple of 8 bits for a bswap, for an // early exit. @@ -2909,8 +2911,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, if (!MatchBitReversals && (NumMaskedBits % 8) != 0) return Result; - const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2923,15 +2925,14 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, } // If this is a zext instruction zero extend the result. - if (I->getOpcode() == Instruction::ZExt) { - const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + if (match(V, m_ZExt(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = BitPart(Res->Provider, BitWidth); - auto NarrowBitWidth = - cast(cast(I)->getSrcTy())->getBitWidth(); + auto NarrowBitWidth = X->getType()->getScalarSizeInBits(); for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx) Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx) @@ -2939,40 +2940,33 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } - // Handle intrinsic calls. - if (auto *II = dyn_cast(I)) { - Intrinsic::ID IntrinsicID = II->getIntrinsicID(); - - // Funnel 'double' shifts take 3 operands, 2 inputs and the shift - // amount (modulo). - // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) - const APInt *Amt; - if ((IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) && - match(II->getArgOperand(2), m_APInt(Amt))) { - - // We can treat fshr as a fshl by flipping the modulo amount. - unsigned ModAmt = Amt->urem(BitWidth); - if (IntrinsicID == Intrinsic::fshr) - ModAmt = BitWidth - ModAmt; - - const auto &LHS = collectBitParts(II->getArgOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - const auto &RHS = collectBitParts(II->getArgOperand(1), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - - // Check we have both sources and they are from the same provider. - if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) - return Result; - - unsigned StartBitRHS = BitWidth - ModAmt; - Result = BitPart(LHS->Provider, BitWidth); - for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) - Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; - for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) - Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift + // amount (modulo). + // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) || + match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) { + // We can treat fshr as a fshl by flipping the modulo amount. + unsigned ModAmt = C->urem(BitWidth); + if (cast(I)->getIntrinsicID() == Intrinsic::fshr) + ModAmt = BitWidth - ModAmt; + + const auto &LHS = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &RHS = + collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + + // Check we have both sources and they are from the same provider. + if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) return Result; - } + + unsigned StartBitRHS = BitWidth - ModAmt; + Result = BitPart(LHS->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) + Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; + for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) + Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; + return Result; } } From 51e74e21aae8b4e885e23d3f15922a58bc173c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 1 Oct 2020 09:23:12 +0300 Subject: [PATCH 283/544] [AArch64] Remove a duplicate call to setHasWinCFI. NFCI. The function already has a cleanup scope that calls the same whenever the function is exited. When reading the code, seeing that this return codepath has an explicit call while other return paths lack it is confusing. In the hypothetical case of a function having a prologue that set the HasWinCFI flag in the MF, but the epilogue containing no WinCFI instructions, the HasWinCFI flag in the MF would end up reset back to false. Differential Revision: https://reviews.llvm.org/D88636 --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 7f4498da317c1..868bb247ed5ec 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1770,8 +1770,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); - - MF.setHasWinCFI(HasWinCFI); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for From f4b9dfd9bc414a316d997a314b05ac7f9258a722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 29 Sep 2020 10:09:22 +0300 Subject: [PATCH 284/544] [AArch64] Don't merge sp decrement into later stores when using WinCFI This matches the corresponding existing case in AArch64LoadStoreOpt::findMatchingUpdateInsnForward. Both cases could also be modified to check MBBI->getFlag(FrameSetup/FrameDestroy) instead of forbidding any optimization involving SP, but the effect is probably pretty much the same. Differential Revision: https://reviews.llvm.org/D88541 --- .../AArch64/AArch64LoadStoreOptimizer.cpp | 20 ++++++++++++++----- .../CodeGen/AArch64/arm64-windows-calls.ll | 5 ++++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e07e724b7b0c4..ad180cb2935ee 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1763,6 +1763,11 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, return false; } +static bool needsWinCFI(const MachineFunction *MF) { + return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF->getFunction().needsUnwindTableEntry(); +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); @@ -1803,14 +1808,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( // the memory access (I) and the increment (MBBI) can access the memory // region defined by [SP, MBBI]. const bool BaseRegSP = BaseReg == AArch64::SP; - if (BaseRegSP) { + if (BaseRegSP && needsWinCFI(I->getMF())) { // FIXME: For now, we always block the optimization over SP in windows // targets as it requires to adjust the unwind/debug info, messing up // the unwind info can actually cause a miscompile. - const MCAsmInfo *MAI = I->getMF()->getTarget().getMCAsmInfo(); - if (MAI->usesWindowsCFI() && - I->getMF()->getFunction().needsUnwindTableEntry()) - return E; + return E; } for (unsigned Count = 0; MBBI != E && Count < Limit; @@ -1866,6 +1868,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( } } + const bool BaseRegSP = BaseReg == AArch64::SP; + if (BaseRegSP && needsWinCFI(I->getMF())) { + // FIXME: For now, we always block the optimization over SP in windows + // targets as it requires to adjust the unwind/debug info, messing up + // the unwind info can actually cause a miscompile. + return E; + } + // Track which register units have been modified and used between the first // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); diff --git a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll index 13a6881748329..bbdc594eca95b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll +++ b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll @@ -26,7 +26,10 @@ define dso_local [2 x i64] @"?f2"() { entry: ; FIXME: Missed optimization, the entire SP push/pop could be removed ; CHECK-LABEL: f2 -; CHECK: stp xzr, xzr, [sp, #-16]! +; CHECK: sub sp, sp, #16 +; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: stp xzr, xzr, [sp] ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: .seh_startepilogue From 45698ac0052ae5b1c5beb739636396a5b7263966 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 30 Sep 2020 09:36:37 +0200 Subject: [PATCH 285/544] [clangd] Split DecisionForest Evaluate() into one func per tree. This allows us MSAN to instrument this function. Previous version is not instrumentable due to it shear volume. Differential Revision: https://reviews.llvm.org/D88536 --- .../clangd/quality/CompletionModelCodegen.py | 99 +++++++++++-------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py index 423e5d14cf523..a1f0cb78037ab 100644 --- a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py @@ -1,7 +1,7 @@ """Code generator for Code Completion Model Inference. Tool runs on the Decision Forest model defined in {model} directory. -It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp +It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp The generated files defines the Example class named {cpp_class} having all the features as class members. The generated runtime provides an `Evaluate` function which can be used to score a code completion candidate. """ @@ -39,34 +39,32 @@ def header_guard(filename): def boost_node(n, label, next_label): - """Returns code snippet for a leaf/boost node. - Adds value of leaf to the score and jumps to the root of the next tree.""" - return "%s: Score += %s; goto %s;" % ( - label, n['score'], next_label) + """Returns code snippet for a leaf/boost node.""" + return "%s: return %s;" % (label, n['score']) def if_greater_node(n, label, next_label): """Returns code snippet for a if_greater node. - Jumps to true_label if the Example feature (NUMBER) is greater than the threshold. - Comparing integers is much faster than comparing floats. Assuming floating points + Jumps to true_label if the Example feature (NUMBER) is greater than the threshold. + Comparing integers is much faster than comparing floats. Assuming floating points are represented as IEEE 754, it order-encodes the floats to integers before comparing them. Control falls through if condition is evaluated to false.""" threshold = n["threshold"] - return "%s: if (E.%s >= %s /*%s*/) goto %s;" % ( - label, n['feature'], order_encode(threshold), threshold, next_label) + return "%s: if (E.get%s() >= %s /*%s*/) goto %s;" % ( + label, n['feature'], order_encode(threshold), threshold, next_label) def if_member_node(n, label, next_label): """Returns code snippet for a if_member node. - Jumps to true_label if the Example feature (ENUM) is present in the set of enum values + Jumps to true_label if the Example feature (ENUM) is present in the set of enum values described in the node. Control falls through if condition is evaluated to false.""" members = '|'.join([ "BIT(%s_type::%s)" % (n['feature'], member) for member in n["set"] ]) - return "%s: if (E.%s & (%s)) goto %s;" % ( - label, n['feature'], members, next_label) + return "%s: if (E.get%s() & (%s)) goto %s;" % ( + label, n['feature'], members, next_label) def node(n, label, next_label): @@ -94,8 +92,6 @@ def tree(t, tree_num, node_num): """ label = "t%d_n%d" % (tree_num, node_num) code = [] - if node_num == 0: - code.append("t%d:" % tree_num) if t["operation"] == "boost": code.append(node(t, label=label, next_label="t%d" % (tree_num + 1))) @@ -119,13 +115,15 @@ def gen_header_code(features_json, cpp_class, filename): """Returns code for header declaring the inference runtime. Declares the Example class named {cpp_class} inside relevant namespaces. - The Example class contains all the features as class members. This + The Example class contains all the features as class members. This class can be used to represent a code completion candidate. Provides `float Evaluate()` function which can be used to score the Example. """ setters = [] + getters = [] for f in features_json: feature = f["name"] + if f["kind"] == "NUMBER": # Floats are order-encoded to integers for faster comparison. setters.append( @@ -138,8 +136,15 @@ class can be used to represent a code completion candidate. raise ValueError("Unhandled feature type.", f["kind"]) # Class members represent all the features of the Example. - class_members = ["uint32_t %s = 0;" % f['name'] for f in features_json] - + class_members = [ + "uint32_t %s = 0;" % f['name'] + for f in features_json + ] + getters = [ + "LLVM_ATTRIBUTE_ALWAYS_INLINE uint32_t get%s() const { return %s; }" + % (f['name'], f['name']) + for f in features_json + ] nline = "\n " guard = header_guard(filename) return """#ifndef %s @@ -150,6 +155,10 @@ class can be used to represent a code completion candidate. %s class %s { public: + // Setters. + %s + + // Getters. %s private: @@ -158,18 +167,16 @@ class %s { // Produces an integer that sorts in the same order as F. // That is: a < b <==> orderEncode(a) < orderEncode(b). static uint32_t OrderEncode(float F); - friend float Evaluate(const %s&); }; -// The function may have large number of lines of code. MSAN -// build times out in such case. -LLVM_NO_SANITIZE("memory") float Evaluate(const %s&); %s #endif // %s -""" % (guard, guard, cpp_class.ns_begin(), cpp_class.name, nline.join(setters), - nline.join(class_members), cpp_class.name, cpp_class.name, - cpp_class.ns_end(), guard) +""" % (guard, guard, cpp_class.ns_begin(), cpp_class.name, + nline.join(setters), + nline.join(getters), + nline.join(class_members), + cpp_class.name, cpp_class.ns_end(), guard) def order_encode(v): @@ -182,21 +189,33 @@ def order_encode(v): def evaluate_func(forest_json, cpp_class): - """Generates code for `float Evaluate(const {Example}&)` function. - The generated function can be used to score an Example.""" - code = "float Evaluate(const %s& E) {\n" % cpp_class.name - lines = [] - lines.append("float Score = 0;") + """Generates evaluation functions for each tree and combines them in + `float Evaluate(const {Example}&)` function. This function can be + used to score an Example.""" + + code = "" + + # Generate evaluation function of each tree. + code += "namespace {\n" tree_num = 0 for tree_json in forest_json: - lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0]) - lines.append("") + code += "LLVM_ATTRIBUTE_NOINLINE float EvaluateTree%d(const %s& E) {\n" % (tree_num, cpp_class.name) + code += " " + \ + "\n ".join( + tree(tree_json, tree_num=tree_num, node_num=0)[0]) + "\n" + code += "}\n\n" tree_num += 1 + code += "} // namespace\n\n" + + # Combine the scores of all trees in the final function. + # MSAN will timeout if these functions are inlined. + code += "float Evaluate(const %s& E) {\n" % cpp_class.name + code += " float Score = 0;\n" + for tree_num in range(len(forest_json)): + code += " Score += EvaluateTree%d(E);\n" % tree_num + code += " return Score;\n" + code += "}\n" - lines.append("t%s: // No such tree." % len(forest_json)) - lines.append("return Score;") - code += " " + "\n ".join(lines) - code += "\n}" return code @@ -218,9 +237,9 @@ def gen_cpp_code(forest_json, features_json, filename, cpp_class): # using-decl for ENUM features. using_decls = "\n".join("using %s_type = %s;" % ( - feature['name'], feature['type']) - for feature in features_json - if feature["kind"] == "ENUM") + feature['name'], feature['type']) + for feature in features_json + if feature["kind"] == "ENUM") nl = "\n" return """%s @@ -287,7 +306,9 @@ def main(): with open(header_file, 'w+t') as output_h: output_h.write(gen_header_code( - features_json=features_json, cpp_class=cpp_class, filename=filename)) + features_json=features_json, + cpp_class=cpp_class, + filename=filename)) if __name__ == '__main__': From cb3fd715f324ff0f58dfeb7d08a88a05477cb0d5 Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Thu, 1 Oct 2020 12:12:34 -0400 Subject: [PATCH 286/544] Reland rG4fcd1a8e6528:[llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking. This is mostly for the benefit of the LBR latency mode. Right now, it performs no checking. If this is run on non-supported hardware, it will produce all zeroes for latency. Differential Revision: https://reviews.llvm.org/D85254 New change: Updated lit.local.cfg to use pass the right argument to llvm-exegesis to actually request the LBR mode. Differential Revision: https://reviews.llvm.org/D88670 --- .../tools/llvm-exegesis/X86/lbr/lit.local.cfg | 8 +-- llvm/tools/llvm-exegesis/lib/Target.h | 5 ++ llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 17 +++++ .../llvm-exegesis/lib/X86/X86Counter.cpp | 65 +++++++++++++++---- llvm/tools/llvm-exegesis/lib/X86/X86Counter.h | 5 ++ llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 15 +---- 6 files changed, 86 insertions(+), 29 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg index 431967c1ec9b0..14d44768458a3 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg @@ -10,7 +10,7 @@ elif not ('x86_64' in config.root.host_triple): config.unsupported = True else: - # We need libpfm to be installed and the host to be at least skylake. + # We need libpfm to be installed and the host to be support LBR format with cycles. llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) if not llvm_exegesis_exe: print('llvm-exegesis not found') @@ -18,14 +18,10 @@ else: else: try: with open(os.devnull, 'w') as quiet: - check_llvm_exegesis_uops_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) check_llvm_exegesis_latency_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-mode', 'latency', '-x86-lbr-sample-period', '123', '-repetition-mode', 'loop', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) except OSError: print('could not exec llvm-exegesis') config.unsupported = True - if not check_llvm_exegesis_uops_result == 0: - config.unsupported = True if not check_llvm_exegesis_latency_result == 0: config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 70890795426d9..8a5624b42803a 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -142,6 +142,11 @@ class ExegesisTarget { return {&Instr}; } + // Checks hardware and software support for current benchmark mode. + // Returns an error if the target host does not have support to run the + // benchmark. + virtual Error checkFeatureSupport() const { return Error::success(); } + // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 9f045fa11aa24..270825a8777ba 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -674,6 +674,23 @@ class ExegesisX86Target : public ExegesisTarget { return Arch == Triple::x86_64 || Arch == Triple::x86; } + Error checkFeatureSupport() const override { + // LBR is the only feature we conditionally support now. + // So if LBR is not requested, then we should be able to run the benchmarks. + if (LbrSamplingPeriod == 0) + return Error::success(); + +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::checkLbrSupport(); +#else + return llvm::make_error( + "LBR not supported on this kernel and/or platform", + llvm::errc::not_supported); +#endif + } + static const unsigned kUnavailableRegisters[4]; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp index 57b493818aaad..25ec4f8586755 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -21,6 +21,7 @@ #endif // HAVE_LIBPFM #include +#include #include #include #include @@ -35,6 +36,8 @@ namespace llvm { namespace exegesis { +// Number of entries in the LBR. +static constexpr int kLbrEntries = 16; static constexpr size_t kBufferPages = 8; static const size_t kDataBufferSize = kBufferPages * getpagesize(); @@ -70,7 +73,6 @@ static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail, static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, const void *From, const void *To, llvm::SmallVector *CycleArray) { - assert(From != nullptr && To != nullptr); const char *DataPtr = DataBuf; while (DataPtr < DataBuf + DataSize) { struct perf_event_header Header; @@ -149,21 +151,47 @@ void X86LbrCounter::start() { ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */); } +llvm::Error X86LbrCounter::checkLbrSupport() { + // Do a sample read and check if the results contain non-zero values. + + X86LbrCounter counter(X86LbrPerfEvent(123)); + counter.start(); + + // Prevent the compiler from unrolling the loop and get rid of all the + // branches. We need at least 16 iterations. + int Sum = 0; + int V = 1; + + volatile int *P = &V; + auto TimeLimit = + std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5); + + for (int I = 0; + I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit; + ++I) { + Sum += *P; + } + + counter.stop(); + + auto ResultOrError = counter.doReadCounter(nullptr, nullptr); + if (ResultOrError) + if (!ResultOrError.get().empty()) + // If there is at least one non-zero entry, then LBR is supported. + for (const int64_t &Value : ResultOrError.get()) + if (Value != 0) + return Error::success(); + + return llvm::make_error( + "LBR format with cycles is not suppported on the host.", + llvm::errc::not_supported); +} + llvm::Expected> X86LbrCounter::readOrError(StringRef FunctionBytes) const { - // The max number of time-outs/retries before we give up. - static constexpr int kMaxTimeouts = 160; - // Disable the event before reading ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); - // Parses the LBR buffer and fills CycleArray with the sequence of cycle - // counts from the buffer. - llvm::SmallVector CycleArray; - std::unique_ptr DataBuf(new char[kDataBufferSize]); - int NumTimeouts = 0; - int PollResult = 0; - // Find the boundary of the function so that we could filter the LBRs // to keep only the relevant records. if (FunctionBytes.empty()) @@ -172,6 +200,21 @@ X86LbrCounter::readOrError(StringRef FunctionBytes) const { const void *From = reinterpret_cast(FunctionBytes.data()); const void *To = reinterpret_cast(FunctionBytes.data() + FunctionBytes.size()); + return doReadCounter(From, To); +} + +llvm::Expected> +X86LbrCounter::doReadCounter(const void *From, const void *To) const { + // The max number of time-outs/retries before we give up. + static constexpr int kMaxTimeouts = 160; + + // Parses the LBR buffer and fills CycleArray with the sequence of cycle + // counts from the buffer. + llvm::SmallVector CycleArray; + auto DataBuf = std::make_unique(kDataBufferSize); + int NumTimeouts = 0; + int PollResult = 0; + while (PollResult <= 0) { PollResult = pollLbrPerfEvent(FileDescriptor); if (PollResult > 0) diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h index 94062012917df..73e4dc5b990a0 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h @@ -33,6 +33,8 @@ class X86LbrPerfEvent : public pfm::PerfEvent { class X86LbrCounter : public pfm::Counter { public: + static llvm::Error checkLbrSupport(); + explicit X86LbrCounter(pfm::PerfEvent &&Event); virtual ~X86LbrCounter(); @@ -43,6 +45,9 @@ class X86LbrCounter : public pfm::Counter { readOrError(StringRef FunctionBytes) const override; private: + llvm::Expected> + doReadCounter(const void *From, const void *To) const; + void *MMappedBuffer = nullptr; }; diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index fb3f41e147348..bc2f348a7eaeb 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -160,12 +160,6 @@ static cl::opt cl::desc(""), cl::cat(AnalysisOptions), cl::init("")); -static cl::list - AllowedHostCpus("allowed-host-cpu", - cl::desc("If specified, only run the benchmark if the host " - "CPU matches the names"), - cl::cat(Options), cl::ZeroOrMore); - static cl::opt AnalysisDisplayUnstableOpcodes( "analysis-display-unstable-clusters", cl::desc("if there is more than one benchmark for an opcode, said " @@ -302,12 +296,9 @@ void benchmarkMain() { const LLVMState State(CpuName); - llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU(); - for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end(); - ++Begin) { - if (ActualCpu != *Begin) - ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu)); - } + // Preliminary check to ensure features needed for requested + // benchmark mode are present on target CPU and/or OS. + ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); const std::unique_ptr Runner = ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( From c6ea095b9756dff035aed27e7b5b44bf42d22462 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Wed, 30 Sep 2020 22:43:54 -0700 Subject: [PATCH 287/544] [mlir][Linalg] NFC : Move fusion on tensors to separate file. Differential Revision: https://reviews.llvm.org/D88633 --- .../Dialect/Linalg/Transforms/CMakeLists.txt | 1 + mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 675 ----------------- .../Linalg/Transforms/FusionOnTensors.cpp | 698 ++++++++++++++++++ 3 files changed, 699 insertions(+), 675 deletions(-) create mode 100644 mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index a281aa55a44fb..2b137175d1741 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,6 +1,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms DropUnitDims.cpp Fusion.cpp + FusionOnTensors.cpp Hoisting.cpp Interchange.cpp Loops.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 8dadfe63e6596..c964c2466d5c0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -736,687 +736,12 @@ static void fuseLinalgOpsGreedily(FuncOp f) { LLVM_DEBUG(f.print(dbgs() << "\nAfter linalg-fusion: \n")); } -//====---------------------------------------------------------------------===// -// Fusion on Tensor operation. -//====---------------------------------------------------------------------===// - -namespace { - -/// Implementation of fusion of generic ops and indexed_generic ops. -struct FuseGenericOpsOnTensors { - static bool isFusible(LinalgOp producer, LinalgOp consumer, - unsigned consumerIdx) { - // Producer and consumer must have tensor semantics. - if (!producer.hasTensorSemantics() || !consumer.hasTensorSemantics()) - return false; - - // Verify that - // - the producer has all "parallel" iterator type. - if (producer.getNumParallelLoops() != producer.getNumLoops()) - return false; - - // Get the consumer index map. The number of results of the consumer index - // map must match the number of loops of the producer. - AffineMap consumerIndexMap = consumer.getIndexingMap(consumerIdx); - if (consumerIndexMap.getNumResults() != producer.getNumLoops()) - return false; - - // Finally the index_map for the result must be invertible. For now just - // verify it is a permutation. - AffineMap producerResultIndexMap = producer.getOutputIndexingMap(0); - return producerResultIndexMap.isPermutation(); - } - - static LinalgOp fuse(LinalgOp producer, LinalgOp consumer, - unsigned consumerIdx, PatternRewriter &rewriter, - OperationFolder *folder = nullptr) { - if (!isFusible(producer, consumer, consumerIdx)) - return nullptr; - - unsigned numFusedOperands = producer.getOperation()->getNumOperands() + - consumer.getOperation()->getNumOperands() - 1; - - // Compute the fused operands list, - SmallVector fusedOperands; - fusedOperands.reserve(numFusedOperands); - auto consumerOperands = consumer.getOperation()->getOperands(); - auto producerOperands = producer.getOperation()->getOperands(); - fusedOperands.assign(consumerOperands.begin(), - std::next(consumerOperands.begin(), consumerIdx)); - fusedOperands.append(producerOperands.begin(), producerOperands.end()); - fusedOperands.append(std::next(consumerOperands.begin(), consumerIdx + 1), - consumerOperands.end()); - - // Compute indexing_maps for the fused operation. The indexing_maps for the - // operands of the consumers that arent fused are the same. The - // indexing_maps for the producers need to be computed based on the - // indexing_map of the operand at consumerIdx in the consumer. - SmallVector fusedIndexMaps; - auto consumerIndexMaps = consumer.indexing_maps(); - fusedIndexMaps.reserve(fusedOperands.size() + - consumer.getOperation()->getNumResults()); - fusedIndexMaps.assign(consumerIndexMaps.begin(), - std::next(consumerIndexMaps.begin(), consumerIdx)); - // Compute indexing maps for the producer args in the fused operation. - computeProducerOperandIndex( - producer, consumer.getInputIndexingMap(consumerIdx), fusedIndexMaps); - - // Append the indexing maps for the remaining consumer operands. - fusedIndexMaps.append(std::next(consumerIndexMaps.begin(), consumerIdx + 1), - consumerIndexMaps.end()); - - // Generate the fused op. - // Tensor-level fusion is only on ops without initTensors and outputBuffers. - LinalgOp fusedOp; - if (isa(producer.getOperation()) && - isa(consumer.getOperation())) { - fusedOp = - rewriter - .create(consumer.getLoc(), - consumer.getOperation()->getResultTypes(), - /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, - rewriter.getArrayAttr(fusedIndexMaps), - consumer.iterator_types(), - /*doc=*/nullptr, - /*library_call=*/nullptr, - /*symbol_source=*/nullptr) - .getOperation(); - } else { - fusedOp = - rewriter - .create( - consumer.getLoc(), consumer.getOperation()->getResultTypes(), - /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, - rewriter.getArrayAttr(fusedIndexMaps), - consumer.iterator_types(), - /*doc=*/nullptr, - /*library_call=*/nullptr, - /*symbol_source=*/nullptr) - .getOperation(); - } - - // Construct an AffineMap from consumer loops to producer loops. - // consumer loop -> tensor index - AffineMap consumerResultIndexMap = - consumer.getInputIndexingMap(consumerIdx); - // producer loop -> tensor index - AffineMap producerResultIndexMap = producer.getOutputIndexingMap(0); - // tensor index -> producer loop - AffineMap invProducerResultIndexMap = - inversePermutation(producerResultIndexMap); - assert(invProducerResultIndexMap && - "expected producer result indexig map to be invertible"); - // consumer loop -> producer loop - AffineMap consumerToProducerLoopsMap = - invProducerResultIndexMap.compose(consumerResultIndexMap); - - generateFusedRegion(rewriter, fusedOp, producer, consumer, - consumerToProducerLoopsMap, consumerIdx, - consumer.getNumLoops()); - return fusedOp; - } - -private: - /// Append to `fusedOpIndexingMapAttrs` the indexing maps for the operands of - /// the `producer` to use in the fused operation given the indexing map of the - /// result of the producer in the consumer. - static void computeProducerOperandIndex( - LinalgOp producer, AffineMap fusedConsumerArgIndexMap, - SmallVectorImpl &fusedOpIndexingMapAttrs) { - // The indexing map in the consumer op (fusedConsumerArgIndexMap) is a map - // from consumer loop -> consumer arg tensor index/producer result tensor - // index. The fused loop is same as the consumer loop. For each producer arg - // the indexing map to be computed is a map from consumer loop -> producer - // arg tensor index. - - AffineMap producerResultIndexMap = producer.getOutputIndexingMap(0); - // producerResultIndexMap is a map from producer loop -> tensor index. - // Compute the inverse to get map from tensor index -> producer loop. - // The inverse is a map from producer result tensor index -> producer loop. - AffineMap invProducerResultIndexMap = - inversePermutation(producerResultIndexMap); - assert(invProducerResultIndexMap && - "expected producer result indexig map to be invertible"); - for (unsigned argNum : llvm::seq(0, producer.getNumInputs())) { - // argMap is a map from producer loop -> producer arg tensor index. - AffineMap argMap = producer.getInputIndexingMap(argNum); - - // Compose argMap with invProducerResultIndexMap to get a map from - // producer result tensor index -> producer arg tensor index. - AffineMap t1 = argMap.compose(invProducerResultIndexMap); - - // Compose t1 with fusedConsumerArgIndexMap gives an indexing map from - // consumer loop/ fused loop -> producer arg tensor index. - AffineMap indexingMap = t1.compose(fusedConsumerArgIndexMap); - fusedOpIndexingMapAttrs.push_back(AffineMapAttr::get(indexingMap)); - } - } - - /// Generate the region of the fused operation. The region of the fused op - /// must be empty. - static void generateFusedRegion(PatternRewriter &rewriter, Operation *fusedOp, - LinalgOp producer, LinalgOp consumer, - AffineMap consumerToProducerLoopsMap, - unsigned consumerIdx, unsigned nloops) { - // Build the region of the fused op. - Block &producerBlock = producer.getOperation()->getRegion(0).front(); - Block &consumerBlock = consumer.getOperation()->getRegion(0).front(); - Block *fusedBlock = new Block(); - fusedOp->getRegion(0).push_back(fusedBlock); - BlockAndValueMapping mapper; - OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(fusedBlock); - - // The block arguments are - // [index_0, index_1, ... , - // consumer_operand_0, ... , consumer_operand_(`consumerIdx`-1), - // producer_operand_0, ... , producer_operand_(n-1)], - // consumer_operand_(`consumerIdx`), .. consumer_operand_(m-1)] - // , where n is the number of producer's operand and m is the number - // consumer's operand. - // If both `numProducerIndices` and `numConsumerIndices` are zero, this is a - // generic op. In this case, there are no indices in block arguments. - unsigned numProducerIndices = - isa(producer.getOperation()) ? nloops : 0; - unsigned numConsumerIndices = - isa(consumer.getOperation()) ? nloops : 0; - // Firstly, add all the indices to the block arguments. - for (unsigned i = 0, e = std::max(numProducerIndices, numConsumerIndices); - i < e; ++i) - fusedBlock->addArgument(rewriter.getIndexType()); - // Map the arguments for the unmodified args from the consumer. - for (auto consumerArg : llvm::enumerate(consumerBlock.getArguments())) { - if (consumerArg.index() == consumerIdx + numConsumerIndices) { - // Map the arguments for the args from the producer. - for (auto producerArg : llvm::enumerate(producerBlock.getArguments())) { - // If producer is an indexed_generic op, map the indices from consumer - // loop to producer loop (because the fusedOp is built based on - // consumer's perspective). - if (producerArg.index() < numProducerIndices) { - auto newIndex = rewriter.create( - producer.getLoc(), - consumerToProducerLoopsMap.getSubMap(producerArg.index()), - fusedBlock->getArguments().take_front(nloops)); - mapper.map(producerArg.value(), newIndex); - } else { - mapper.map(producerArg.value(), - fusedBlock->addArgument(producerArg.value().getType())); - } - } - continue; - } - - // If consumer is an indexed_generic op, map the indices to the block - // arguments directly. Otherwise, add the same type of arugment and map to - // it. - if (consumerArg.index() < numConsumerIndices) { - mapper.map(consumerArg.value(), - fusedBlock->getArgument(consumerArg.index())); - } else { - mapper.map(consumerArg.value(), - fusedBlock->addArgument(consumerArg.value().getType())); - } - } - - // Add operations from producer (except the yield operation) to the fused - // op. - for (auto &op : producerBlock.getOperations()) { - if (auto yieldOp = dyn_cast(op)) { - // Lookup the value the yield operation is mapped to. - Value yieldVal = yieldOp.getOperand(0); - if (Value clonedVal = mapper.lookupOrNull(yieldVal)) - mapper.map( - consumerBlock.getArgument(consumerIdx + numConsumerIndices), - clonedVal); - continue; - } - rewriter.clone(op, mapper); - } - for (auto &op : consumerBlock.getOperations()) - rewriter.clone(op, mapper); - } -}; -} // namespace - -/// Linearize the expressions in `sourceMap` based on the `reassociationMaps` -/// provided, given the shape of the source tensor that corresponds to the -/// `sourceMap`. Note that this implicitly assumes that the tensors dimensions -/// are "row-major" ordered logically. -/// -/// For example: -/// -/// %0 = op ... : tensor -/// with output index_map `affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>` -/// -/// and reshape: -/// %1 = linalg.tensor_reshape %0 [affine_map<(i, j, k, l) -> (i)>, -/// affine_map<(i, j, k, l) -> (j, k, l)>] : -/// tensor into tensor -/// -/// would be rewritten into: -/// %0 = op ... : tensor -/// with output index_map -/// `affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)>` -static AffineMap linearizeCollapsedDims(AffineMap sourceMap, - ArrayRef sourceShape, - ArrayRef reassociationMaps) { - SmallVector resultExprs; - resultExprs.reserve(reassociationMaps.size()); - ArrayRef sourceExprs = sourceMap.getResults(); - MLIRContext *context = sourceMap.getContext(); - - // Compute the result exprs based on the reassociation maps. - for (AffineMap map : reassociationMaps) { - ArrayRef collapsedDims = map.getResults(); - // Assume that they are in-order and contiguous (already checked in - // verifier). - assert(!collapsedDims.empty()); - unsigned startDim = - collapsedDims.front().cast().getPosition(); - AffineExpr linearizedExpr = makeCanonicalStridedLayoutExpr( - sourceShape.slice(startDim, collapsedDims.size()), - sourceExprs.slice(startDim, collapsedDims.size()), context); - resultExprs.push_back(linearizedExpr); - } - return AffineMap::get(sourceMap.getNumDims(), sourceMap.getNumSymbols(), - resultExprs, context); -} - -/// Checks if the `reshapeOp` can be fused with it consumer (if `asProducer` is -/// true) or its producer (if `asProducer` is false) given the indexing map at -/// its use. -static bool isTensorReshapeOpFusible(TensorReshapeOp reshapeOp, - AffineMap useIndexMap, bool asProducer) { - RankedTensorType returnType = reshapeOp.getResultType(); - RankedTensorType operandType = reshapeOp.getSrcType(); - // Reshape is fusible with its consumer (i.e. reshape as a producer) when its - // operand is of lesser rank than the result. Fusing when operand has higher - // rank will require use of mods and divs in the indexing maps of the fused op - // which would make it non-invertible. Similarly reshape is fused with its - // producer (i.e. reshape as consumer) only if the return type has lesser - // rank. - if ((asProducer && returnType.getRank() < operandType.getRank()) || - (!asProducer && operandType.getRank() < returnType.getRank())) - return false; - return useIndexMap.isIdentity(); -} - -/// Based on the type of `op` create a linalg op of the same type, i.e. if `op` -/// is a linalg.generic operation, the create a `linalg.generic` operation with -/// the given `args`. Expects `op` to be `linalg.generic` or -/// `linalg.indexed_generic`. -template -static LinalgOp createLinalgOpOfSameType(LinalgOp op, PatternRewriter &rewriter, - Args... args) { - if (isa(op.getOperation())) - return cast(rewriter.create(args...).getOperation()); - if (isa(op.getOperation())) - return cast( - rewriter.create(args...).getOperation()); - llvm_unreachable( - "expected only linalg.generic or linalg.indexed_generic ops"); - return nullptr; -} - -namespace { - -/// Implementation of fusion on tensor ops when producer is a TensorReshapeOp. -struct FuseTensorReshapeOpAsProducer { - static bool isFusible(TensorReshapeOp producer, LinalgOp consumer, - unsigned consumerIdx) { - return isa(consumer.getOperation()) && - consumer.hasTensorSemantics() && - isTensorReshapeOpFusible(producer, - consumer.getInputIndexingMap(consumerIdx), - /*asProducer=*/true); - } - - static LinalgOp fuse(TensorReshapeOp producer, LinalgOp consumer, - unsigned consumerIdx, PatternRewriter &rewriter, - OperationFolder *folder = nullptr) { - if (producer.src().getDefiningOp()) - return nullptr; - - if (!isFusible(producer, consumer, consumerIdx)) - return nullptr; - - // Compute the fused operands list, - Operation *consumerOp = consumer.getOperation(); - SmallVector fusedOperands(consumerOp->getOperands()); - fusedOperands[consumerIdx] = producer.src(); - - // Compute indexing_maps for the fused operation. The indexing_maps for the - // operands of the consumers that arent fused are the same. - SmallVector fusedIndexMaps = - llvm::to_vector<4>(llvm::map_range( - consumer.indexing_maps(), [](Attribute attr) -> AffineMap { - return attr.cast().getValue(); - })); - - // Compute the indexing map to use for the operand of the producer. - AffineMap modifiedMap = linearizeCollapsedDims( - fusedIndexMaps[consumerIdx], producer.getResultType().getShape(), - producer.getReassociationMaps()); - for (AffineExpr expr : modifiedMap.getResults()) { - if (!expr.isPureAffine()) - return nullptr; - } - fusedIndexMaps[consumerIdx] = modifiedMap; - - // Further check that the resulting index maps can be fused and - // inverted. Without this the resultant op is not legal. - if (!inversePermutation(concatAffineMaps(fusedIndexMaps))) - return nullptr; - - SmallVector indexMapAttrs = llvm::to_vector<4>( - llvm::map_range(fusedIndexMaps, [](AffineMap map) -> Attribute { - return AffineMapAttr::get(map); - })); - LinalgOp fusedOp = createLinalgOpOfSameType( - consumer, rewriter, rewriter.getUnknownLoc(), - consumerOp->getResultTypes(), - /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, // no init tensors for now. - rewriter.getArrayAttr(indexMapAttrs), consumer.iterator_types(), - /*doc=*/nullptr, - /*library_call=*/nullptr, - /*symbol_source=*/nullptr); - auto &fusedRegion = fusedOp.getOperation()->getRegion(0); - rewriter.cloneRegionBefore(consumerOp->getRegion(0), fusedRegion, - fusedRegion.begin()); - return fusedOp; - } -}; - -/// Implementation of fusion on tensor ops when consumer is a TensorReshapeOp. -struct FuseTensorReshapeOpAsConsumer { - static bool isCollapsingAndFusible(LinalgOp producer, - TensorReshapeOp consumer, - unsigned consumerIdx) { - return isa(producer.getOperation()) && - producer.hasTensorSemantics() && - isTensorReshapeOpFusible(consumer, producer.getOutputIndexingMap(0), - /*asProducer=*/false); - } - - static LinalgOp fuseCollapsingCase(LinalgOp producer, - TensorReshapeOp consumer, - unsigned consumerIdx, - PatternRewriter &rewriter) { - // The indexing_maps for the operands of the fused operation are same as - // those for the operands of the producer. - SmallVector fusedIndexMaps = - llvm::to_vector<4>(llvm::map_range( - producer.indexing_maps(), [](Attribute attr) -> AffineMap { - return attr.cast().getValue(); - })); - // Compute the indexing map to use for the operand of the producer. - AffineMap modifiedMap = linearizeCollapsedDims( - producer.getOutputIndexingMap(0), consumer.getSrcType().getShape(), - consumer.getReassociationMaps()); - for (AffineExpr expr : modifiedMap.getResults()) { - if (!expr.isPureAffine()) - return nullptr; - } - fusedIndexMaps.back() = modifiedMap; - - // Further check that the resulting index maps can be fused and - // inverted. Without this the resultant op is not legal. - if (!inversePermutation(concatAffineMaps(fusedIndexMaps))) - return nullptr; - - SmallVector indexMapAttrs = llvm::to_vector<4>( - llvm::map_range(fusedIndexMaps, [](AffineMap map) -> Attribute { - return AffineMapAttr::get(map); - })); - - Operation *producerOp = producer.getOperation(); - LinalgOp fusedOp = createLinalgOpOfSameType( - producer, rewriter, rewriter.getUnknownLoc(), consumer.getResultType(), - /*inputs=*/producerOp->getOperands(), - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, // no init tensors for now. - rewriter.getArrayAttr(indexMapAttrs), producer.iterator_types(), - /*doc=*/nullptr, - /*library_call=*/nullptr, - /*symbol_source=*/nullptr); - auto &fusedRegion = fusedOp.getOperation()->getRegion(0); - rewriter.cloneRegionBefore(producerOp->getRegion(0), fusedRegion, - fusedRegion.begin()); - return fusedOp; - } - - static bool isExpandingAndFusible(LinalgOp producer, TensorReshapeOp consumer, - unsigned consumerIdx) { - // Is fusible only if: - // 1) The producer is a generic op. - // 2) The producer has tensor semantics. - // 3) The tensor reshape op is a expanding case. - // 4) All the shapes are the same for the generic op. - // 5) All the indexing maps in producer are identity. - // 6) All the loops in producer are parallel loops. - // 7) The producer has a single user. - auto types = producer.getInputOutputShapedTypes(); - assert(!types.empty()); - return isa(producer.getOperation()) && - producer.hasTensorSemantics() && - consumer.getSrcType().getRank() < - consumer.getResultType().getRank() && - std::equal(types.begin() + 1, types.end(), types.begin()) && - llvm::all_of(producer.getIndexingMaps(), - [](AffineMap map) { return map.isIdentity(); }) && - llvm::all_of(producer.iterator_types(), - [](Attribute attr) { - return attr.cast().getValue() == - getParallelIteratorTypeName(); - }) && - producer.getOperation()->hasOneUse(); - } - - static LinalgOp fuseExpandingCase(LinalgOp producer, TensorReshapeOp consumer, - unsigned consumerIdx, - PatternRewriter &rewriter) { - Location loc = producer.getLoc(); - auto dstShape = consumer.getResultType().cast().getShape(); - SmallVector args; - for (auto arg : producer.getOperation()->getOperands()) { - auto type = RankedTensorType::get( - dstShape, arg.getType().cast().getElementType()); - args.push_back(rewriter.createOrFold( - loc, type, arg, consumer.reassociation())); - } - - SmallVector resultTypes; - for (auto t : producer.getOutputTensorTypes()) { - Type type = RankedTensorType::get(dstShape, - t.cast().getElementType()); - resultTypes.push_back(type); - } - - int rank = dstShape.size(); - auto genericOp = rewriter.create( - loc, resultTypes, /*inputs=*/args, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, - SmallVector(args.size() + resultTypes.size(), - rewriter.getMultiDimIdentityMap(rank)), - SmallVector(rank, getParallelIteratorTypeName())); - Region ®ion = genericOp.getRegion(); - rewriter.cloneRegionBefore(producer.getOperation()->getRegion(0), region, - region.begin()); - return cast(genericOp.getOperation()); - } - - static LinalgOp fuse(LinalgOp producer, TensorReshapeOp consumer, - unsigned consumerIdx, PatternRewriter &rewriter, - OperationFolder *folder = nullptr) { - if (isCollapsingAndFusible(producer, consumer, consumerIdx)) - return fuseCollapsingCase(producer, consumer, consumerIdx, rewriter); - if (isExpandingAndFusible(producer, consumer, consumerIdx)) - return fuseExpandingCase(producer, consumer, consumerIdx, rewriter); - return nullptr; - } -}; - -/// Implementation of fusion on tensor ops when producer is a splat constant. -struct FuseConstantOpAsProducer { - static bool isFusible(ConstantOp producer, LinalgOp consumer, - unsigned consumerIdx) { - return isa(consumer.getOperation()) && - consumer.hasTensorSemantics() && - producer.getResult().getType().isa() && - producer.value().cast().isSplat(); - } - - static LinalgOp fuse(ConstantOp producer, LinalgOp consumer, - unsigned consumerIdx, PatternRewriter &rewriter, - OperationFolder *folder = nullptr) { - if (!isFusible(producer, consumer, consumerIdx)) - return nullptr; - - // The indexing_maps for the operands of the fused operation are same as - // those for the operands of the consumer without the indexing map at - // consumerIdx - SmallVector fusedIndexMaps = - llvm::to_vector<4>(llvm::map_range( - consumer.indexing_maps(), [](Attribute attr) -> AffineMap { - return attr.cast().getValue(); - })); - fusedIndexMaps.erase(std::next(fusedIndexMaps.begin(), consumerIdx)); - - // The operands list is same as the consumer with the argument for constant - // index dropped. - Operation *consumerOp = consumer.getOperation(); - SmallVector fusedOperands(consumerOp->getOperands()); - fusedOperands.erase(std::next(fusedOperands.begin(), consumerIdx)); - - // Create a constant scalar value from the splat constant. - Value scalarConstant = rewriter.create( - producer.getLoc(), - producer.value().cast().getSplatValue()); - - LinalgOp fusedOp = createLinalgOpOfSameType( - consumer, rewriter, rewriter.getUnknownLoc(), - consumerOp->getResultTypes(), - /*inputs=*/fusedOperands, - /*outputBuffers=*/ValueRange{}, - /*initTensors=*/ValueRange{}, // no init tensors for now. - rewriter.getAffineMapArrayAttr(fusedIndexMaps), - consumer.iterator_types(), - /*doc=*/nullptr, - /*library_call=*/nullptr, - /*symbol_source=*/nullptr); - - // Map the block argument corresponding to the replaced argument with the - // scalar constant. - Region &consumerRegion = consumerOp->getRegion(0); - Block &entryBlock = *consumerRegion.begin(); - unsigned argIndex = entryBlock.getNumArguments() - - consumerOp->getNumOperands() + consumerIdx; - BlockAndValueMapping mapping; - mapping.map(entryBlock.getArgument(argIndex), scalarConstant); - Region &fusedRegion = fusedOp.getOperation()->getRegion(0); - rewriter.cloneRegionBefore(consumerRegion, fusedRegion, fusedRegion.begin(), - mapping); - return fusedOp; - } -}; -} // namespace - -Operation *mlir::linalg::fuseTensorOps(PatternRewriter &rewriter, - Operation *consumer, - unsigned consumerIdx, - OperationFolder *folder) { - if (consumerIdx >= consumer->getNumOperands()) - return nullptr; - Operation *producer = consumer->getOperand(consumerIdx).getDefiningOp(); - if (!producer || producer->getNumResults() != 1) - return nullptr; - - // Fuse when consumer is GenericOp or IndexedGenericOp. - if (isa(consumer)) { - if (isa(producer)) - return FuseGenericOpsOnTensors::fuse(cast(producer), - cast(consumer), - consumerIdx, rewriter, folder); - if (auto reshapeOpProducer = dyn_cast(producer)) - return FuseTensorReshapeOpAsProducer::fuse(reshapeOpProducer, - cast(consumer), - consumerIdx, rewriter, folder); - if (auto constantOpProducer = dyn_cast(producer)) - return FuseConstantOpAsProducer::fuse(constantOpProducer, - cast(consumer), - consumerIdx, rewriter, folder); - return nullptr; - } - - if (isa(producer)) { - // Fuse when consumer is a TensorReshapeOp. - if (TensorReshapeOp reshapeOp = dyn_cast(consumer)) { - return FuseTensorReshapeOpAsConsumer::fuse( - cast(producer), reshapeOp, consumerIdx, rewriter, folder); - } - } - - return nullptr; -} - namespace { -/// Patterns to fuse a generic op, with the producer of its operands. -template -struct FuseTensorOps : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(LinalgOpTy op, - PatternRewriter &rewriter) const override { - // Find the first operand that is defined by another generic op on tensors. - for (auto operandNum : - llvm::seq(0, op.getOperation()->getNumOperands())) { - Operation *producer = - op.getOperation()->getOperand(operandNum).getDefiningOp(); - if (Operation *fusedOp = fuseTensorOps(rewriter, op, operandNum)) { - rewriter.replaceOp(op, fusedOp->getResults()); - if (producer && llvm::all_of(producer->getResults(), - [](Value val) { return val.use_empty(); })) - rewriter.eraseOp(producer); - return success(); - } - } - return failure(); - } -}; - -/// Pass that fuses generic ops on tensors. Used only for testing. -struct FusionOfTensorOpsPass - : public LinalgFusionOfTensorOpsBase { - void runOnOperation() override { - OwningRewritePatternList patterns; - Operation *op = getOperation(); - populateLinalgTensorOpsFusionPatterns(op->getContext(), patterns); - applyPatternsAndFoldGreedily(op->getRegions(), patterns); - }; -}; - struct LinalgFusionPass : public LinalgFusionBase { void runOnFunction() override { fuseLinalgOpsGreedily(getFunction()); } }; } // namespace -void mlir::populateLinalgTensorOpsFusionPatterns( - MLIRContext *context, OwningRewritePatternList &patterns) { - patterns.insert, FuseTensorOps, - FuseTensorOps>(context); -} - std::unique_ptr> mlir::createLinalgFusionPass() { return std::make_unique(); } - -std::unique_ptr mlir::createLinalgFusionOfTensorOpsPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp new file mode 100644 index 0000000000000..a62b1ada2c187 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -0,0 +1,698 @@ +//===- Fusion.cpp - Implementation of linalg Fusion -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the linalg dialect Fusion on tensors operations pass. +// +//===----------------------------------------------------------------------===// +#include "PassDetail.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/IR/LinalgTypes.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" + +using namespace mlir; +using namespace mlir::linalg; + +namespace { + +/// Implementation of fusion of generic ops and indexed_generic ops. +struct FuseGenericOpsOnTensors { + static bool isFusible(LinalgOp producer, LinalgOp consumer, + unsigned consumerIdx) { + // Producer and consumer must have tensor semantics. + if (!producer.hasTensorSemantics() || !consumer.hasTensorSemantics()) + return false; + + // Verify that + // - the producer has all "parallel" iterator type. + if (producer.getNumParallelLoops() != producer.getNumLoops()) + return false; + + // Get the consumer index map. The number of results of the consumer index + // map must match the number of loops of the producer. + AffineMap consumerIndexMap = consumer.getIndexingMap(consumerIdx); + if (consumerIndexMap.getNumResults() != producer.getNumLoops()) + return false; + + // Finally the index_map for the result must be invertible. For now just + // verify it is a permutation. + AffineMap producerResultIndexMap = producer.getOutputIndexingMap(0); + return producerResultIndexMap.isPermutation(); + } + + static LinalgOp fuse(LinalgOp producer, LinalgOp consumer, + unsigned consumerIdx, PatternRewriter &rewriter, + OperationFolder *folder = nullptr) { + if (!isFusible(producer, consumer, consumerIdx)) + return nullptr; + + unsigned numFusedOperands = producer.getOperation()->getNumOperands() + + consumer.getOperation()->getNumOperands() - 1; + + // Compute the fused operands list, + SmallVector fusedOperands; + fusedOperands.reserve(numFusedOperands); + auto consumerOperands = consumer.getOperation()->getOperands(); + auto producerOperands = producer.getOperation()->getOperands(); + fusedOperands.assign(consumerOperands.begin(), + std::next(consumerOperands.begin(), consumerIdx)); + fusedOperands.append(producerOperands.begin(), producerOperands.end()); + fusedOperands.append(std::next(consumerOperands.begin(), consumerIdx + 1), + consumerOperands.end()); + + // Compute indexing_maps for the fused operation. The indexing_maps for the + // operands of the consumers that arent fused are the same. The + // indexing_maps for the producers need to be computed based on the + // indexing_map of the operand at consumerIdx in the consumer. + SmallVector fusedIndexMaps; + auto consumerIndexMaps = consumer.indexing_maps(); + fusedIndexMaps.reserve(fusedOperands.size() + + consumer.getOperation()->getNumResults()); + fusedIndexMaps.assign(consumerIndexMaps.begin(), + std::next(consumerIndexMaps.begin(), consumerIdx)); + // Compute indexing maps for the producer args in the fused operation. + computeProducerOperandIndex( + producer, consumer.getInputIndexingMap(consumerIdx), fusedIndexMaps); + + // Append the indexing maps for the remaining consumer operands. + fusedIndexMaps.append(std::next(consumerIndexMaps.begin(), consumerIdx + 1), + consumerIndexMaps.end()); + + // Generate the fused op. + // Tensor-level fusion is only on ops without initTensors and outputBuffers. + LinalgOp fusedOp; + if (isa(producer.getOperation()) && + isa(consumer.getOperation())) { + fusedOp = + rewriter + .create(consumer.getLoc(), + consumer.getOperation()->getResultTypes(), + /*inputs=*/fusedOperands, + /*outputBuffers=*/ValueRange{}, + /*initTensors=*/ValueRange{}, + rewriter.getArrayAttr(fusedIndexMaps), + consumer.iterator_types(), + /*doc=*/nullptr, + /*library_call=*/nullptr, + /*symbol_source=*/nullptr) + .getOperation(); + } else { + fusedOp = + rewriter + .create( + consumer.getLoc(), consumer.getOperation()->getResultTypes(), + /*inputs=*/fusedOperands, + /*outputBuffers=*/ValueRange{}, + /*initTensors=*/ValueRange{}, + rewriter.getArrayAttr(fusedIndexMaps), + consumer.iterator_types(), + /*doc=*/nullptr, + /*library_call=*/nullptr, + /*symbol_source=*/nullptr) + .getOperation(); + } + + // Construct an AffineMap from consumer loops to producer loops. + // consumer loop -> tensor index + AffineMap consumerResultIndexMap = + consumer.getInputIndexingMap(consumerIdx); + // producer loop -> tensor index + AffineMap producerResultIndexMap = producer.getOutputIndexingMap(0); + // tensor index -> producer loop + AffineMap invProducerResultIndexMap = + inversePermutation(producerResultIndexMap); + assert(invProducerResultIndexMap && + "expected producer result indexig map to be invertible"); + // consumer loop -> producer loop + AffineMap consumerToProducerLoopsMap = + invProducerResultIndexMap.compose(consumerResultIndexMap); + + generateFusedRegion(rewriter, fusedOp, producer, consumer, + consumerToProducerLoopsMap, consumerIdx, + consumer.getNumLoops()); + return fusedOp; + } + +private: + /// Append to `fusedOpIndexingMapAttrs` the indexing maps for the operands of + /// the `producer` to use in the fused operation given the indexing map of the + /// result of the producer in the consumer. + static void computeProducerOperandIndex( + LinalgOp producer, AffineMap fusedConsumerArgIndexMap, + SmallVectorImpl &fusedOpIndexingMapAttrs) { + // The indexing map in the consumer op (fusedConsumerArgIndexMap) is a map + // from consumer loop -> consumer arg tensor index/producer result tensor + // index. The fused loop is same as the consumer loop. For each producer arg + // the indexing map to be computed is a map from consumer loop -> producer + // arg tensor index. + + AffineMap producerResultIndexMap = producer.getOutputIndexingMap(0); + // producerResultIndexMap is a map from producer loop -> tensor index. + // Compute the inverse to get map from tensor index -> producer loop. + // The inverse is a map from producer result tensor index -> producer loop. + AffineMap invProducerResultIndexMap = + inversePermutation(producerResultIndexMap); + assert(invProducerResultIndexMap && + "expected producer result indexig map to be invertible"); + for (unsigned argNum : llvm::seq(0, producer.getNumInputs())) { + // argMap is a map from producer loop -> producer arg tensor index. + AffineMap argMap = producer.getInputIndexingMap(argNum); + + // Compose argMap with invProducerResultIndexMap to get a map from + // producer result tensor index -> producer arg tensor index. + AffineMap t1 = argMap.compose(invProducerResultIndexMap); + + // Compose t1 with fusedConsumerArgIndexMap gives an indexing map from + // consumer loop/ fused loop -> producer arg tensor index. + AffineMap indexingMap = t1.compose(fusedConsumerArgIndexMap); + fusedOpIndexingMapAttrs.push_back(AffineMapAttr::get(indexingMap)); + } + } + + /// Generate the region of the fused operation. The region of the fused op + /// must be empty. + static void generateFusedRegion(PatternRewriter &rewriter, Operation *fusedOp, + LinalgOp producer, LinalgOp consumer, + AffineMap consumerToProducerLoopsMap, + unsigned consumerIdx, unsigned nloops) { + // Build the region of the fused op. + Block &producerBlock = producer.getOperation()->getRegion(0).front(); + Block &consumerBlock = consumer.getOperation()->getRegion(0).front(); + Block *fusedBlock = new Block(); + fusedOp->getRegion(0).push_back(fusedBlock); + BlockAndValueMapping mapper; + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(fusedBlock); + + // The block arguments are + // [index_0, index_1, ... , + // consumer_operand_0, ... , consumer_operand_(`consumerIdx`-1), + // producer_operand_0, ... , producer_operand_(n-1)], + // consumer_operand_(`consumerIdx`), .. consumer_operand_(m-1)] + // , where n is the number of producer's operand and m is the number + // consumer's operand. + // If both `numProducerIndices` and `numConsumerIndices` are zero, this is a + // generic op. In this case, there are no indices in block arguments. + unsigned numProducerIndices = + isa(producer.getOperation()) ? nloops : 0; + unsigned numConsumerIndices = + isa(consumer.getOperation()) ? nloops : 0; + // Firstly, add all the indices to the block arguments. + for (unsigned i = 0, e = std::max(numProducerIndices, numConsumerIndices); + i < e; ++i) + fusedBlock->addArgument(rewriter.getIndexType()); + // Map the arguments for the unmodified args from the consumer. + for (auto consumerArg : llvm::enumerate(consumerBlock.getArguments())) { + if (consumerArg.index() == consumerIdx + numConsumerIndices) { + // Map the arguments for the args from the producer. + for (auto producerArg : llvm::enumerate(producerBlock.getArguments())) { + // If producer is an indexed_generic op, map the indices from consumer + // loop to producer loop (because the fusedOp is built based on + // consumer's perspective). + if (producerArg.index() < numProducerIndices) { + auto newIndex = rewriter.create( + producer.getLoc(), + consumerToProducerLoopsMap.getSubMap(producerArg.index()), + fusedBlock->getArguments().take_front(nloops)); + mapper.map(producerArg.value(), newIndex); + } else { + mapper.map(producerArg.value(), + fusedBlock->addArgument(producerArg.value().getType())); + } + } + continue; + } + + // If consumer is an indexed_generic op, map the indices to the block + // arguments directly. Otherwise, add the same type of arugment and map to + // it. + if (consumerArg.index() < numConsumerIndices) { + mapper.map(consumerArg.value(), + fusedBlock->getArgument(consumerArg.index())); + } else { + mapper.map(consumerArg.value(), + fusedBlock->addArgument(consumerArg.value().getType())); + } + } + + // Add operations from producer (except the yield operation) to the fused + // op. + for (auto &op : producerBlock.getOperations()) { + if (auto yieldOp = dyn_cast(op)) { + // Lookup the value the yield operation is mapped to. + Value yieldVal = yieldOp.getOperand(0); + if (Value clonedVal = mapper.lookupOrNull(yieldVal)) + mapper.map( + consumerBlock.getArgument(consumerIdx + numConsumerIndices), + clonedVal); + continue; + } + rewriter.clone(op, mapper); + } + for (auto &op : consumerBlock.getOperations()) + rewriter.clone(op, mapper); + } +}; +} // namespace + +/// Linearize the expressions in `sourceMap` based on the `reassociationMaps` +/// provided, given the shape of the source tensor that corresponds to the +/// `sourceMap`. Note that this implicitly assumes that the tensors dimensions +/// are "row-major" ordered logically. +/// +/// For example: +/// +/// %0 = op ... : tensor +/// with output index_map `affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>` +/// +/// and reshape: +/// %1 = linalg.tensor_reshape %0 [affine_map<(i, j, k, l) -> (i)>, +/// affine_map<(i, j, k, l) -> (j, k, l)>] : +/// tensor into tensor +/// +/// would be rewritten into: +/// %0 = op ... : tensor +/// with output index_map +/// `affine_map<(d0, d1, d2, d3) -> (d0, d1 * 20 + d2 * 5 + d3)>` +static AffineMap linearizeCollapsedDims(AffineMap sourceMap, + ArrayRef sourceShape, + ArrayRef reassociationMaps) { + SmallVector resultExprs; + resultExprs.reserve(reassociationMaps.size()); + ArrayRef sourceExprs = sourceMap.getResults(); + MLIRContext *context = sourceMap.getContext(); + + // Compute the result exprs based on the reassociation maps. + for (AffineMap map : reassociationMaps) { + ArrayRef collapsedDims = map.getResults(); + // Assume that they are in-order and contiguous (already checked in + // verifier). + assert(!collapsedDims.empty()); + unsigned startDim = + collapsedDims.front().cast().getPosition(); + AffineExpr linearizedExpr = makeCanonicalStridedLayoutExpr( + sourceShape.slice(startDim, collapsedDims.size()), + sourceExprs.slice(startDim, collapsedDims.size()), context); + resultExprs.push_back(linearizedExpr); + } + return AffineMap::get(sourceMap.getNumDims(), sourceMap.getNumSymbols(), + resultExprs, context); +} + +/// Checks if the `reshapeOp` can be fused with it consumer (if `asProducer` is +/// true) or its producer (if `asProducer` is false) given the indexing map at +/// its use. +static bool isTensorReshapeOpFusible(TensorReshapeOp reshapeOp, + AffineMap useIndexMap, bool asProducer) { + RankedTensorType returnType = reshapeOp.getResultType(); + RankedTensorType operandType = reshapeOp.getSrcType(); + // Reshape is fusible with its consumer (i.e. reshape as a producer) when its + // operand is of lesser rank than the result. Fusing when operand has higher + // rank will require use of mods and divs in the indexing maps of the fused op + // which would make it non-invertible. Similarly reshape is fused with its + // producer (i.e. reshape as consumer) only if the return type has lesser + // rank. + if ((asProducer && returnType.getRank() < operandType.getRank()) || + (!asProducer && operandType.getRank() < returnType.getRank())) + return false; + return useIndexMap.isIdentity(); +} + +/// Based on the type of `op` create a linalg op of the same type, i.e. if `op` +/// is a linalg.generic operation, the create a `linalg.generic` operation with +/// the given `args`. Expects `op` to be `linalg.generic` or +/// `linalg.indexed_generic`. +template +static LinalgOp createLinalgOpOfSameType(LinalgOp op, PatternRewriter &rewriter, + Args... args) { + if (isa(op.getOperation())) + return cast(rewriter.create(args...).getOperation()); + if (isa(op.getOperation())) + return cast( + rewriter.create(args...).getOperation()); + llvm_unreachable( + "expected only linalg.generic or linalg.indexed_generic ops"); + return nullptr; +} + +namespace { + +/// Implementation of fusion on tensor ops when producer is a TensorReshapeOp. +struct FuseTensorReshapeOpAsProducer { + static bool isFusible(TensorReshapeOp producer, LinalgOp consumer, + unsigned consumerIdx) { + return isa(consumer.getOperation()) && + consumer.hasTensorSemantics() && + isTensorReshapeOpFusible(producer, + consumer.getInputIndexingMap(consumerIdx), + /*asProducer=*/true); + } + + static LinalgOp fuse(TensorReshapeOp producer, LinalgOp consumer, + unsigned consumerIdx, PatternRewriter &rewriter, + OperationFolder *folder = nullptr) { + if (producer.src().getDefiningOp()) + return nullptr; + + if (!isFusible(producer, consumer, consumerIdx)) + return nullptr; + + // Compute the fused operands list, + Operation *consumerOp = consumer.getOperation(); + SmallVector fusedOperands(consumerOp->getOperands()); + fusedOperands[consumerIdx] = producer.src(); + + // Compute indexing_maps for the fused operation. The indexing_maps for the + // operands of the consumers that arent fused are the same. + SmallVector fusedIndexMaps = + llvm::to_vector<4>(llvm::map_range( + consumer.indexing_maps(), [](Attribute attr) -> AffineMap { + return attr.cast().getValue(); + })); + + // Compute the indexing map to use for the operand of the producer. + AffineMap modifiedMap = linearizeCollapsedDims( + fusedIndexMaps[consumerIdx], producer.getResultType().getShape(), + producer.getReassociationMaps()); + for (AffineExpr expr : modifiedMap.getResults()) { + if (!expr.isPureAffine()) + return nullptr; + } + fusedIndexMaps[consumerIdx] = modifiedMap; + + // Further check that the resulting index maps can be fused and + // inverted. Without this the resultant op is not legal. + if (!inversePermutation(concatAffineMaps(fusedIndexMaps))) + return nullptr; + + SmallVector indexMapAttrs = llvm::to_vector<4>( + llvm::map_range(fusedIndexMaps, [](AffineMap map) -> Attribute { + return AffineMapAttr::get(map); + })); + LinalgOp fusedOp = createLinalgOpOfSameType( + consumer, rewriter, rewriter.getUnknownLoc(), + consumerOp->getResultTypes(), + /*inputs=*/fusedOperands, + /*outputBuffers=*/ValueRange{}, + /*initTensors=*/ValueRange{}, // no init tensors for now. + rewriter.getArrayAttr(indexMapAttrs), consumer.iterator_types(), + /*doc=*/nullptr, + /*library_call=*/nullptr, + /*symbol_source=*/nullptr); + auto &fusedRegion = fusedOp.getOperation()->getRegion(0); + rewriter.cloneRegionBefore(consumerOp->getRegion(0), fusedRegion, + fusedRegion.begin()); + return fusedOp; + } +}; + +/// Implementation of fusion on tensor ops when consumer is a TensorReshapeOp. +struct FuseTensorReshapeOpAsConsumer { + static bool isCollapsingAndFusible(LinalgOp producer, + TensorReshapeOp consumer, + unsigned consumerIdx) { + return isa(producer.getOperation()) && + producer.hasTensorSemantics() && + isTensorReshapeOpFusible(consumer, producer.getOutputIndexingMap(0), + /*asProducer=*/false); + } + + static LinalgOp fuseCollapsingCase(LinalgOp producer, + TensorReshapeOp consumer, + unsigned consumerIdx, + PatternRewriter &rewriter) { + // The indexing_maps for the operands of the fused operation are same as + // those for the operands of the producer. + SmallVector fusedIndexMaps = + llvm::to_vector<4>(llvm::map_range( + producer.indexing_maps(), [](Attribute attr) -> AffineMap { + return attr.cast().getValue(); + })); + // Compute the indexing map to use for the operand of the producer. + AffineMap modifiedMap = linearizeCollapsedDims( + producer.getOutputIndexingMap(0), consumer.getSrcType().getShape(), + consumer.getReassociationMaps()); + for (AffineExpr expr : modifiedMap.getResults()) { + if (!expr.isPureAffine()) + return nullptr; + } + fusedIndexMaps.back() = modifiedMap; + + // Further check that the resulting index maps can be fused and + // inverted. Without this the resultant op is not legal. + if (!inversePermutation(concatAffineMaps(fusedIndexMaps))) + return nullptr; + + SmallVector indexMapAttrs = llvm::to_vector<4>( + llvm::map_range(fusedIndexMaps, [](AffineMap map) -> Attribute { + return AffineMapAttr::get(map); + })); + + Operation *producerOp = producer.getOperation(); + LinalgOp fusedOp = createLinalgOpOfSameType( + producer, rewriter, rewriter.getUnknownLoc(), consumer.getResultType(), + /*inputs=*/producerOp->getOperands(), + /*outputBuffers=*/ValueRange{}, + /*initTensors=*/ValueRange{}, // no init tensors for now. + rewriter.getArrayAttr(indexMapAttrs), producer.iterator_types(), + /*doc=*/nullptr, + /*library_call=*/nullptr, + /*symbol_source=*/nullptr); + auto &fusedRegion = fusedOp.getOperation()->getRegion(0); + rewriter.cloneRegionBefore(producerOp->getRegion(0), fusedRegion, + fusedRegion.begin()); + return fusedOp; + } + + static bool isExpandingAndFusible(LinalgOp producer, TensorReshapeOp consumer, + unsigned consumerIdx) { + // Is fusible only if: + // 1) The producer is a generic op. + // 2) The producer has tensor semantics. + // 3) The tensor reshape op is a expanding case. + // 4) All the shapes are the same for the generic op. + // 5) All the indexing maps in producer are identity. + // 6) All the loops in producer are parallel loops. + // 7) The producer has a single user. + auto types = producer.getInputOutputShapedTypes(); + assert(!types.empty()); + return isa(producer.getOperation()) && + producer.hasTensorSemantics() && + consumer.getSrcType().getRank() < + consumer.getResultType().getRank() && + std::equal(types.begin() + 1, types.end(), types.begin()) && + llvm::all_of(producer.getIndexingMaps(), + [](AffineMap map) { return map.isIdentity(); }) && + llvm::all_of(producer.iterator_types(), + [](Attribute attr) { + return attr.cast().getValue() == + getParallelIteratorTypeName(); + }) && + producer.getOperation()->hasOneUse(); + } + + static LinalgOp fuseExpandingCase(LinalgOp producer, TensorReshapeOp consumer, + unsigned consumerIdx, + PatternRewriter &rewriter) { + Location loc = producer.getLoc(); + auto dstShape = consumer.getResultType().cast().getShape(); + SmallVector args; + for (auto arg : producer.getOperation()->getOperands()) { + auto type = RankedTensorType::get( + dstShape, arg.getType().cast().getElementType()); + args.push_back(rewriter.createOrFold( + loc, type, arg, consumer.reassociation())); + } + + SmallVector resultTypes; + for (auto t : producer.getOutputTensorTypes()) { + Type type = RankedTensorType::get(dstShape, + t.cast().getElementType()); + resultTypes.push_back(type); + } + + int rank = dstShape.size(); + auto genericOp = rewriter.create( + loc, resultTypes, /*inputs=*/args, + /*outputBuffers=*/ValueRange{}, + /*initTensors=*/ValueRange{}, + SmallVector(args.size() + resultTypes.size(), + rewriter.getMultiDimIdentityMap(rank)), + SmallVector(rank, getParallelIteratorTypeName())); + Region ®ion = genericOp.getRegion(); + rewriter.cloneRegionBefore(producer.getOperation()->getRegion(0), region, + region.begin()); + return cast(genericOp.getOperation()); + } + + static LinalgOp fuse(LinalgOp producer, TensorReshapeOp consumer, + unsigned consumerIdx, PatternRewriter &rewriter, + OperationFolder *folder = nullptr) { + if (isCollapsingAndFusible(producer, consumer, consumerIdx)) + return fuseCollapsingCase(producer, consumer, consumerIdx, rewriter); + if (isExpandingAndFusible(producer, consumer, consumerIdx)) + return fuseExpandingCase(producer, consumer, consumerIdx, rewriter); + return nullptr; + } +}; + +/// Implementation of fusion on tensor ops when producer is a splat constant. +struct FuseConstantOpAsProducer { + static bool isFusible(ConstantOp producer, LinalgOp consumer, + unsigned consumerIdx) { + return isa(consumer.getOperation()) && + consumer.hasTensorSemantics() && + producer.getResult().getType().isa() && + producer.value().cast().isSplat(); + } + + static LinalgOp fuse(ConstantOp producer, LinalgOp consumer, + unsigned consumerIdx, PatternRewriter &rewriter, + OperationFolder *folder = nullptr) { + if (!isFusible(producer, consumer, consumerIdx)) + return nullptr; + + // The indexing_maps for the operands of the fused operation are same as + // those for the operands of the consumer without the indexing map at + // consumerIdx + SmallVector fusedIndexMaps = + llvm::to_vector<4>(llvm::map_range( + consumer.indexing_maps(), [](Attribute attr) -> AffineMap { + return attr.cast().getValue(); + })); + fusedIndexMaps.erase(std::next(fusedIndexMaps.begin(), consumerIdx)); + + // The operands list is same as the consumer with the argument for constant + // index dropped. + Operation *consumerOp = consumer.getOperation(); + SmallVector fusedOperands(consumerOp->getOperands()); + fusedOperands.erase(std::next(fusedOperands.begin(), consumerIdx)); + + // Create a constant scalar value from the splat constant. + Value scalarConstant = rewriter.create( + producer.getLoc(), + producer.value().cast().getSplatValue()); + + LinalgOp fusedOp = createLinalgOpOfSameType( + consumer, rewriter, rewriter.getUnknownLoc(), + consumerOp->getResultTypes(), + /*inputs=*/fusedOperands, + /*outputBuffers=*/ValueRange{}, + /*initTensors=*/ValueRange{}, // no init tensors for now. + rewriter.getAffineMapArrayAttr(fusedIndexMaps), + consumer.iterator_types(), + /*doc=*/nullptr, + /*library_call=*/nullptr, + /*symbol_source=*/nullptr); + + // Map the block argument corresponding to the replaced argument with the + // scalar constant. + Region &consumerRegion = consumerOp->getRegion(0); + Block &entryBlock = *consumerRegion.begin(); + unsigned argIndex = entryBlock.getNumArguments() - + consumerOp->getNumOperands() + consumerIdx; + BlockAndValueMapping mapping; + mapping.map(entryBlock.getArgument(argIndex), scalarConstant); + Region &fusedRegion = fusedOp.getOperation()->getRegion(0); + rewriter.cloneRegionBefore(consumerRegion, fusedRegion, fusedRegion.begin(), + mapping); + return fusedOp; + } +}; +} // namespace + +Operation *mlir::linalg::fuseTensorOps(PatternRewriter &rewriter, + Operation *consumer, + unsigned consumerIdx, + OperationFolder *folder) { + if (consumerIdx >= consumer->getNumOperands()) + return nullptr; + Operation *producer = consumer->getOperand(consumerIdx).getDefiningOp(); + if (!producer || producer->getNumResults() != 1) + return nullptr; + + // Fuse when consumer is GenericOp or IndexedGenericOp. + if (isa(consumer)) { + if (isa(producer)) + return FuseGenericOpsOnTensors::fuse(cast(producer), + cast(consumer), + consumerIdx, rewriter, folder); + if (auto reshapeOpProducer = dyn_cast(producer)) + return FuseTensorReshapeOpAsProducer::fuse(reshapeOpProducer, + cast(consumer), + consumerIdx, rewriter, folder); + if (auto constantOpProducer = dyn_cast(producer)) + return FuseConstantOpAsProducer::fuse(constantOpProducer, + cast(consumer), + consumerIdx, rewriter, folder); + return nullptr; + } + + if (isa(producer)) { + // Fuse when consumer is a TensorReshapeOp. + if (TensorReshapeOp reshapeOp = dyn_cast(consumer)) { + return FuseTensorReshapeOpAsConsumer::fuse( + cast(producer), reshapeOp, consumerIdx, rewriter, folder); + } + } + + return nullptr; +} + +namespace { +/// Patterns to fuse a generic op, with the producer of its operands. +template +struct FuseTensorOps : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(LinalgOpTy op, + PatternRewriter &rewriter) const override { + // Find the first operand that is defined by another generic op on tensors. + for (auto operandNum : + llvm::seq(0, op.getOperation()->getNumOperands())) { + Operation *producer = + op.getOperation()->getOperand(operandNum).getDefiningOp(); + if (Operation *fusedOp = fuseTensorOps(rewriter, op, operandNum)) { + rewriter.replaceOp(op, fusedOp->getResults()); + if (producer && llvm::all_of(producer->getResults(), + [](Value val) { return val.use_empty(); })) + rewriter.eraseOp(producer); + return success(); + } + } + return failure(); + } +}; + +/// Pass that fuses generic ops on tensors. Used only for testing. +struct FusionOfTensorOpsPass + : public LinalgFusionOfTensorOpsBase { + void runOnOperation() override { + OwningRewritePatternList patterns; + Operation *op = getOperation(); + populateLinalgTensorOpsFusionPatterns(op->getContext(), patterns); + applyPatternsAndFoldGreedily(op->getRegions(), patterns); + }; +}; +} // namespace + +void mlir::populateLinalgTensorOpsFusionPatterns( + MLIRContext *context, OwningRewritePatternList &patterns) { + patterns.insert, FuseTensorOps, + FuseTensorOps>(context); +} + +std::unique_ptr mlir::createLinalgFusionOfTensorOpsPass() { + return std::make_unique(); +} From dcb5b6dfbfb5dafb66797e8dba2f04eb76a153b7 Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Wed, 30 Sep 2020 18:00:18 -0700 Subject: [PATCH 288/544] [runtimes] Remove TOOLCHAIN_TOOLS specialization https://reviews.llvm.org/D88310 fixed the AIX issue in LLVMExternalProjectUtils, so we shouldn't need the workaround in the runtimes build anymore. I'm reverting it because it prevents the target-specific tool selection in LLVMExternalProjectUtils from taking effect, which we rely on for our runtimes builds. Reviewed By: daltenty Differential Revision: https://reviews.llvm.org/D88627 --- llvm/runtimes/CMakeLists.txt | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index c96fc8a1f5d7d..73470074ac6cf 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -298,11 +298,6 @@ ${error} Set RUNTIMES_BUILD_ALLOW_DARWIN to allow a single darwin triple.") endif() endforeach() - # 64-bit XCOFF and big AR format is not yet supported in some of these tools. - if(NOT target MATCHES aix) - set(${target}_toolchain_tools lld llvm-ar llvm-lipo llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip) - endif() - llvm_ExternalProject_Add(builtins-${target} ${compiler_rt_path}/lib/builtins DEPENDS ${ARG_DEPENDS} @@ -316,7 +311,6 @@ ${error} Set RUNTIMES_BUILD_ALLOW_DARWIN to allow a single darwin triple.") -DCMAKE_ASM_COMPILER_WORKS=ON -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON ${${target}_extra_args} - TOOLCHAIN_TOOLS clang ${${target}_toolchain_tools} USE_TOOLCHAIN ${EXTRA_ARGS}) endfunction() @@ -524,11 +518,6 @@ ${error} Set RUNTIMES_BUILD_ALLOW_DARWIN to allow a single darwin triple.") list(APPEND EXTRA_ARGS STRIP_TOOL ${CMAKE_CURRENT_BINARY_DIR}/llvm-strip-link) endif() - # 64-bit XCOFF and big AR format is not yet supported in some of these tools. - if(NOT target MATCHES aix) - set(${name}_toolchain_tools lld llvm-ar llvm-lipo llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip) - endif() - llvm_ExternalProject_Add(runtimes-${name} ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${${name}_deps} ${CXX_HEADER_TARGET} @@ -547,7 +536,6 @@ ${error} Set RUNTIMES_BUILD_ALLOW_DARWIN to allow a single darwin triple.") -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON -DLLVM_RUNTIMES_TARGET=${name} ${${name}_extra_args} - TOOLCHAIN_TOOLS clang ${${name}_toolchain_tools} EXTRA_TARGETS ${${name}_extra_targets} ${${name}_test_targets} USE_TOOLCHAIN From 17640c5aac649c154959ca1075953f0d252a4a5b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 30 Sep 2020 10:36:11 -0700 Subject: [PATCH 289/544] [NFC] Let (MC)Register APIs check isStackSlot The user is expected to make the isStackSlot check before calling isPhysicalRegister or isVirtualRegister. The APIs assert otherwise. We can improve the usability of these APIs by carrying out the check in the 2 APIs: they become a complete "source of truth" and remove an extra responsibility from the user. Differential Revision: https://reviews.llvm.org/D88598 --- llvm/include/llvm/CodeGen/Register.h | 7 +------ llvm/include/llvm/MC/MCRegister.h | 7 ++----- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index 054040cd29a1f..884c8bc7dc2ec 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -40,10 +40,6 @@ class Register { /// frame index in a variable that normally holds a register. isStackSlot() /// returns true if Reg is in the range used for stack slots. /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. - /// static bool isStackSlot(unsigned Reg) { return MCRegister::isStackSlot(Reg); } @@ -69,8 +65,7 @@ class Register { /// Return true if the specified register number is in /// the virtual register namespace. static bool isVirtualRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return Reg & MCRegister::VirtualRegFlag; + return Reg & MCRegister::VirtualRegFlag && !isStackSlot(Reg); } /// Convert a virtual register number to a 0-based index. diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h index 1f3c4b8494cc1..5f2e31b70fd8d 100644 --- a/llvm/include/llvm/MC/MCRegister.h +++ b/llvm/include/llvm/MC/MCRegister.h @@ -46,9 +46,6 @@ class MCRegister { /// register. StackSlot values do not exist in the MC layer, see /// Register::isStackSlot() for the more information on them. /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. static bool isStackSlot(unsigned Reg) { return !(Reg & VirtualRegFlag) && uint32_t(Reg & ~VirtualRegFlag) >= FirstStackSlot; @@ -57,8 +54,8 @@ class MCRegister { /// Return true if the specified register number is in /// the physical register namespace. static bool isPhysicalRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return Reg >= FirstPhysicalReg && !(Reg & VirtualRegFlag); + return Reg >= FirstPhysicalReg && !(Reg & VirtualRegFlag) && + !isStackSlot(Reg); } /// Return true if the specified register number is in the physical register From cdfb95ad580fbf366a9bffc5082df22e9d2b5fa3 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 09:32:48 -0700 Subject: [PATCH 290/544] [flang] Add checks for misuse of formatted I/O APIs in unformatted I/O statement Add checking to I/O statement APIs to catch cases where the formatted I/O data item transfer routines like OutputInteger64 are being incorrectly used for unformatted I/O, which should use the unformatted block or descriptor-based data item interfaces. Differential revision: https://reviews.llvm.org/D88672 --- flang/runtime/io-api.cpp | 38 ++++++++++++++++++++++++++++++------- flang/runtime/io-api.h | 4 ++++ flang/runtime/io-stmt.h | 38 ++++++++++++++++++++++++------------- flang/runtime/type-code.cpp | 6 +++--- flang/runtime/unit.cpp | 2 +- 5 files changed, 64 insertions(+), 24 deletions(-) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 18c3f8241f08f..304c40e871f46 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -922,14 +922,16 @@ bool IONAME(InputUnformattedBlock)( } bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) { + cookie->CheckFormattedStmtType("OutputInteger64"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( - TypeCategory::Integer, 8, reinterpret_cast(&n), 0); + TypeCategory::Integer, sizeof n, reinterpret_cast(&n), 0); return descr::DescriptorIO(*cookie, descriptor); } bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { + cookie->CheckFormattedStmtType("InputInteger"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( @@ -938,6 +940,7 @@ bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { } bool IONAME(OutputReal32)(Cookie cookie, float x) { + cookie->CheckFormattedStmtType("OutputReal32"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish(TypeCategory::Real, 4, reinterpret_cast(&x), 0); @@ -945,6 +948,7 @@ bool IONAME(OutputReal32)(Cookie cookie, float x) { } bool IONAME(OutputReal64)(Cookie cookie, double x) { + cookie->CheckFormattedStmtType("OutputReal64"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish(TypeCategory::Real, 8, reinterpret_cast(&x), 0); @@ -952,6 +956,7 @@ bool IONAME(OutputReal64)(Cookie cookie, double x) { } bool IONAME(InputReal32)(Cookie cookie, float &x) { + cookie->CheckFormattedStmtType("InputReal32"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish(TypeCategory::Real, 4, reinterpret_cast(&x), 0); @@ -959,6 +964,7 @@ bool IONAME(InputReal32)(Cookie cookie, float &x) { } bool IONAME(InputReal64)(Cookie cookie, double &x) { + cookie->CheckFormattedStmtType("InputReal64"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish(TypeCategory::Real, 8, reinterpret_cast(&x), 0); @@ -966,6 +972,7 @@ bool IONAME(InputReal64)(Cookie cookie, double &x) { } bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { + cookie->CheckFormattedStmtType("OutputComplex32"); float z[2]{r, i}; StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; @@ -975,6 +982,7 @@ bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { } bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { + cookie->CheckFormattedStmtType("OutputComplex64"); double z[2]{r, i}; StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; @@ -984,6 +992,7 @@ bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { } bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { + cookie->CheckFormattedStmtType("InputComplex32"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( @@ -992,6 +1001,7 @@ bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { } bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { + cookie->CheckFormattedStmtType("InputComplex64"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( @@ -999,34 +1009,48 @@ bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { +bool IONAME(OutputCharacter)( + Cookie cookie, const char *x, std::size_t length, int kind) { + cookie->CheckFormattedStmtType("OutputCharacter"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( - 1, length, reinterpret_cast(const_cast(x)), 0); + kind, length, reinterpret_cast(const_cast(x)), 0); return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputAscii)(Cookie cookie, char *x, std::size_t length) { +bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { + return IONAME(OutputCharacter(cookie, x, length, 1)); +} + +bool IONAME(InputCharacter)( + Cookie cookie, char *x, std::size_t length, int kind) { + cookie->CheckFormattedStmtType("InputCharacter"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; - descriptor.Establish(1, length, reinterpret_cast(x), 0); + descriptor.Establish(kind, length, reinterpret_cast(x), 0); return descr::DescriptorIO(*cookie, descriptor); } +bool IONAME(InputAscii)(Cookie cookie, char *x, std::size_t length) { + return IONAME(InputCharacter(cookie, x, length, 1)); +} + bool IONAME(OutputLogical)(Cookie cookie, bool truth) { + cookie->CheckFormattedStmtType("OutputLogical"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( - TypeCategory::Logical, 1, reinterpret_cast(&truth), 0); + TypeCategory::Logical, sizeof truth, reinterpret_cast(&truth), 0); return descr::DescriptorIO(*cookie, descriptor); } bool IONAME(InputLogical)(Cookie cookie, bool &truth) { + cookie->CheckFormattedStmtType("InputLogical"); StaticDescriptor staticDescriptor; Descriptor &descriptor{staticDescriptor.descriptor()}; descriptor.Establish( - TypeCategory::Logical, 1, reinterpret_cast(&truth), 0); + TypeCategory::Logical, sizeof truth, reinterpret_cast(&truth), 0); return descr::DescriptorIO(*cookie, descriptor); } diff --git a/flang/runtime/io-api.h b/flang/runtime/io-api.h index 369013fee8bc1..80a6de95069cc 100644 --- a/flang/runtime/io-api.h +++ b/flang/runtime/io-api.h @@ -231,10 +231,12 @@ bool IONAME(SetSign)(Cookie, const char *, std::size_t); // and avoid the following items when they might crash. bool IONAME(OutputDescriptor)(Cookie, const Descriptor &); bool IONAME(InputDescriptor)(Cookie, const Descriptor &); +// Contiguous transfers for unformatted I/O bool IONAME(OutputUnformattedBlock)( Cookie, const char *, std::size_t, std::size_t elementBytes); bool IONAME(InputUnformattedBlock)( Cookie, char *, std::size_t, std::size_t elementBytes); +// Formatted (including list directed) I/O data items bool IONAME(OutputInteger64)(Cookie, std::int64_t); bool IONAME(InputInteger)(Cookie, std::int64_t &, int kind = 8); bool IONAME(OutputReal32)(Cookie, float); @@ -245,7 +247,9 @@ bool IONAME(OutputComplex32)(Cookie, float, float); bool IONAME(InputComplex32)(Cookie, float[2]); bool IONAME(OutputComplex64)(Cookie, double, double); bool IONAME(InputComplex64)(Cookie, double[2]); +bool IONAME(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); bool IONAME(OutputAscii)(Cookie, const char *, std::size_t); +bool IONAME(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); bool IONAME(InputAscii)(Cookie, char *, std::size_t); bool IONAME(OutputLogical)(Cookie, bool); bool IONAME(InputLogical)(Cookie, bool &); diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 3c82dc8b1b0a0..343619bc121cb 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -43,6 +43,13 @@ class ExternalFormattedIoStatementState; template class ExternalListIoStatementState; template class UnformattedIoStatementState; +struct InputStatementState {}; +struct OutputStatementState {}; +template +using IoDirectionState = std::conditional_t; +struct FormattedIoStatementState {}; + // The Cookie type in the I/O API is a pointer (for C) to this class. class IoStatementState { public: @@ -90,6 +97,15 @@ class IoStatementState { std::optional NextInField(std::optional &remaining); std::optional GetNextNonBlank(); // can advance record + template void CheckFormattedStmtType(const char *name) { + if (!get_if() || + !get_if>()) { + GetIoErrorHandler().Crash( + "%s called for I/O statement that is not formatted %s", name, + D == Direction::Output ? "output" : "input"); + } + } + private: std::variant, std::reference_wrapper, @@ -132,17 +148,11 @@ struct IoStatementBase : public DefaultFormatControlCallbacks { void BadInquiryKeywordHashCrash(InquiryKeywordHash); }; -struct InputStatementState {}; -struct OutputStatementState {}; -template -using IoDirectionState = std::conditional_t; - -struct FormattedStatementState {}; - // Common state for list-directed internal & external I/O -template struct ListDirectedStatementState {}; -template <> struct ListDirectedStatementState { +template struct ListDirectedStatementState; +template <> +struct ListDirectedStatementState + : public FormattedIoStatementState { static std::size_t RemainingSpaceInRecord(const ConnectionState &); bool NeedAdvance(const ConnectionState &, std::size_t) const; bool EmitLeadingSpaceOrAdvance( @@ -151,7 +161,9 @@ template <> struct ListDirectedStatementState { IoStatementState &, int maxRepeat = 1); bool lastWasUndelimitedCharacter{false}; }; -template <> class ListDirectedStatementState { +template <> +class ListDirectedStatementState + : public FormattedIoStatementState { public: // Skips value separators, handles repetition and null values. // Vacant when '/' appears; present with descriptor == ListDirectedNullValue @@ -199,7 +211,7 @@ class InternalIoStatementState : public IoStatementBase, template class InternalFormattedIoStatementState : public InternalIoStatementState, - public FormattedStatementState { + public FormattedIoStatementState { public: using CharType = CHAR; using typename InternalIoStatementState::Buffer; @@ -275,7 +287,7 @@ class ExternalIoStatementState : public ExternalIoStatementBase, template class ExternalFormattedIoStatementState : public ExternalIoStatementState, - public FormattedStatementState { + public FormattedIoStatementState { public: using CharType = CHAR; ExternalFormattedIoStatementState(ExternalFileUnit &, const CharType *format, diff --git a/flang/runtime/type-code.cpp b/flang/runtime/type-code.cpp index 3fda906516ede..19de2ef2e58ef 100644 --- a/flang/runtime/type-code.cpp +++ b/flang/runtime/type-code.cpp @@ -78,13 +78,13 @@ TypeCode::TypeCode(TypeCategory f, int kind) { raw_ = CFI_type_Bool; break; case 2: - raw_ = CFI_type_int16_t; + raw_ = CFI_type_int_fast16_t; break; case 4: - raw_ = CFI_type_int32_t; + raw_ = CFI_type_int_fast32_t; break; case 8: - raw_ = CFI_type_int64_t; + raw_ = CFI_type_int_fast64_t; break; } break; diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 8170fbc696c21..bcb8a478ad59f 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -290,7 +290,7 @@ bool ExternalFileUnit::Receive(char *data, std::size_t bytes, furthestPositionInRecord = furthestAfter; return true; } else { - handler.SignalEnd(); + // EOF or error: can be handled & has been signaled endfileRecordNumber = currentRecordNumber; return false; } From 9d40fb808fd0fbd33eb3b50c20d7f402de5db91e Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 1 Oct 2020 10:08:33 -0700 Subject: [PATCH 291/544] Allow to specify macro names for android-comparison-in-temp-failure-retry Some projects do not use the TEMP_FAILURE_RETRY macro but define their own one, as not to depend on glibc / Bionic details. By allowing the user to override the list of macros, these projects can also benefit from this check. Differential Revision: https://reviews.llvm.org/D83144 --- .../ComparisonInTempFailureRetryCheck.cpp | 69 +++++++++++-------- .../ComparisonInTempFailureRetryCheck.h | 11 ++- ...droid-comparison-in-temp-failure-retry.rst | 7 ++ ...rison-in-temp-failure-retry-custom-macro.c | 46 +++++++++++++ 4 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/android-comparison-in-temp-failure-retry-custom-macro.c diff --git a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp index 188d44da51d81..c7b9896c64f81 100644 --- a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp @@ -18,32 +18,17 @@ namespace clang { namespace tidy { namespace android { -namespace { -AST_MATCHER(BinaryOperator, isRHSATempFailureRetryArg) { - if (!Node.getBeginLoc().isMacroID()) - return false; - - const SourceManager &SM = Finder->getASTContext().getSourceManager(); - if (!SM.isMacroArgExpansion(Node.getRHS()->IgnoreParenCasts()->getBeginLoc())) - return false; - - const LangOptions &Opts = Finder->getASTContext().getLangOpts(); - SourceLocation LocStart = Node.getBeginLoc(); - while (LocStart.isMacroID()) { - SourceLocation Invocation = SM.getImmediateMacroCallerLoc(LocStart); - Token Tok; - if (!Lexer::getRawToken(SM.getSpellingLoc(Invocation), Tok, SM, Opts, - /*IgnoreWhiteSpace=*/true)) { - if (Tok.getKind() == tok::raw_identifier && - Tok.getRawIdentifier() == "TEMP_FAILURE_RETRY") - return true; - } +ComparisonInTempFailureRetryCheck::ComparisonInTempFailureRetryCheck( + StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + RawRetryList(Options.get("RetryMacros", "TEMP_FAILURE_RETRY")) { + StringRef(RawRetryList).split(RetryMacros, ",", -1, false); +} - LocStart = Invocation; - } - return false; +void ComparisonInTempFailureRetryCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "RetryMacros", RawRetryList); } -} // namespace void ComparisonInTempFailureRetryCheck::registerMatchers(MatchFinder *Finder) { // Both glibc's and Bionic's TEMP_FAILURE_RETRY macros structurally look like: @@ -63,15 +48,43 @@ void ComparisonInTempFailureRetryCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( binaryOperator(hasOperatorName("="), hasRHS(ignoringParenCasts( - binaryOperator(isComparisonOperator()).bind("binop"))), - isRHSATempFailureRetryArg()), + binaryOperator(isComparisonOperator()).bind("inner")))) + .bind("outer"), this); } void ComparisonInTempFailureRetryCheck::check( const MatchFinder::MatchResult &Result) { - const auto &BinOp = *Result.Nodes.getNodeAs("binop"); - diag(BinOp.getOperatorLoc(), "top-level comparison in TEMP_FAILURE_RETRY"); + StringRef RetryMacroName; + const auto &Node = *Result.Nodes.getNodeAs("outer"); + if (!Node.getBeginLoc().isMacroID()) + return; + + const SourceManager &SM = *Result.SourceManager; + if (!SM.isMacroArgExpansion(Node.getRHS()->IgnoreParenCasts()->getBeginLoc())) + return; + + const LangOptions &Opts = Result.Context->getLangOpts(); + SourceLocation LocStart = Node.getBeginLoc(); + while (LocStart.isMacroID()) { + SourceLocation Invocation = SM.getImmediateMacroCallerLoc(LocStart); + Token Tok; + if (!Lexer::getRawToken(SM.getSpellingLoc(Invocation), Tok, SM, Opts, + /*IgnoreWhiteSpace=*/true)) { + if (Tok.getKind() == tok::raw_identifier && + llvm::is_contained(RetryMacros, Tok.getRawIdentifier())) { + RetryMacroName = Tok.getRawIdentifier(); + break; + } + } + + LocStart = Invocation; + } + if (RetryMacroName.empty()) + return; + + const auto &Inner = *Result.Nodes.getNodeAs("inner"); + diag(Inner.getOperatorLoc(), "top-level comparison in %0") << RetryMacroName; // FIXME: FixIts would be nice, but potentially nontrivial when nested macros // happen, e.g. `TEMP_FAILURE_RETRY(IS_ZERO(foo()))` diff --git a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h index d12c999720707..7b000ab2f54f6 100644 --- a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h +++ b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h @@ -10,6 +10,9 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ANDROID_COMPARISONINTEMPFAILURERETRYCHECK_H #include "../ClangTidyCheck.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include namespace clang { namespace tidy { @@ -22,10 +25,14 @@ namespace android { /// TEMP_FAILURE_RETRY is a macro provided by both glibc and Bionic. class ComparisonInTempFailureRetryCheck : public ClangTidyCheck { public: - ComparisonInTempFailureRetryCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + ComparisonInTempFailureRetryCheck(StringRef Name, ClangTidyContext *Context); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + const std::string RawRetryList; + SmallVector RetryMacros; }; } // namespace android diff --git a/clang-tools-extra/docs/clang-tidy/checks/android-comparison-in-temp-failure-retry.rst b/clang-tools-extra/docs/clang-tidy/checks/android-comparison-in-temp-failure-retry.rst index e4de4b04351d7..93112ee2bea64 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/android-comparison-in-temp-failure-retry.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/android-comparison-in-temp-failure-retry.rst @@ -34,3 +34,10 @@ If you encounter this, the fix is simple: lift the comparison out of the while (TEMP_FAILURE_RETRY(read(STDIN_FILENO, cs, sizeof(cs))) != 0) { // Do something with cs. } + +Options +------- + +.. option:: RetryMacros + + A comma-separated list of the names of retry macros to be checked. diff --git a/clang-tools-extra/test/clang-tidy/checkers/android-comparison-in-temp-failure-retry-custom-macro.c b/clang-tools-extra/test/clang-tidy/checkers/android-comparison-in-temp-failure-retry-custom-macro.c new file mode 100644 index 0000000000000..dde03ddabbcb0 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/android-comparison-in-temp-failure-retry-custom-macro.c @@ -0,0 +1,46 @@ +// RUN: %check_clang_tidy %s android-comparison-in-temp-failure-retry %t -- -config="{CheckOptions: [{key: android-comparison-in-temp-failure-retry.RetryMacros, value: 'MY_TEMP_FAILURE_RETRY,MY_OTHER_TEMP_FAILURE_RETRY'}]}" + +#define MY_TEMP_FAILURE_RETRY(x) \ + ({ \ + typeof(x) __z; \ + do \ + __z = (x); \ + while (__z == -1); \ + __z; \ + }) + +#define MY_OTHER_TEMP_FAILURE_RETRY(x) \ + ({ \ + typeof(x) __z; \ + do \ + __z = (x); \ + while (__z == -1); \ + __z; \ + }) + +int foo(); +int bar(int a); + +void with_custom_macro() { + MY_TEMP_FAILURE_RETRY(foo()); + MY_TEMP_FAILURE_RETRY(foo() == 1); + // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: top-level comparison in MY_TEMP_FAILURE_RETRY + MY_TEMP_FAILURE_RETRY((foo())); + MY_TEMP_FAILURE_RETRY((int)(foo() == 1)); + // CHECK-MESSAGES: :[[@LINE-1]]:37: warning: top-level comparison in MY_TEMP_FAILURE_RETRY + MY_TEMP_FAILURE_RETRY((bar(foo() == 1))); + MY_TEMP_FAILURE_RETRY((int)((bar(foo() == 1)) == 1)); + // CHECK-MESSAGES: :[[@LINE-1]]:49: warning: top-level comparison in MY_TEMP_FAILURE_RETRY +} + +void with_other_custom_macro() { + MY_OTHER_TEMP_FAILURE_RETRY(foo()); + MY_OTHER_TEMP_FAILURE_RETRY(foo() == 1); + // CHECK-MESSAGES: :[[@LINE-1]]:37: warning: top-level comparison in MY_OTHER_TEMP_FAILURE_RETRY + MY_OTHER_TEMP_FAILURE_RETRY((foo())); + MY_OTHER_TEMP_FAILURE_RETRY((int)(foo() == 1)); + // CHECK-MESSAGES: :[[@LINE-1]]:43: warning: top-level comparison in MY_OTHER_TEMP_FAILURE_RETRY + MY_OTHER_TEMP_FAILURE_RETRY((bar(foo() == 1))); + MY_OTHER_TEMP_FAILURE_RETRY((int)((bar(foo() == 1)) == 1)); + // CHECK-MESSAGES: :[[@LINE-1]]:55: warning: top-level comparison in MY_OTHER_TEMP_FAILURE_RETRY +} From df6de2222c66c5a1c62da0b10c35de432ddc270a Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 09:44:09 -0700 Subject: [PATCH 292/544] [flang] Fix INQUIRE of access and formatting possibilities Don't give false positives from INQUIRE about possible access mode changes on connected units. DIRECT and SEQUENTIAL cannot be intermixed, apart from allowing DIRECT on a SEQUENTIAL file with fixed-size records and positioning. Nor can FORMATTED and UNFORMATTED be interchanged. On unconnected files, the best that we can do is "UNKNOWN". Differential revision: https://reviews.llvm.org/D88673 --- flang/runtime/io-stmt.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 2a7d552dacd8b..45b5f2a95060d 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -820,7 +820,10 @@ bool InquireUnitState::Inquire( } break; case HashInquiryKeyword("DIRECT"): - str = unit().mayPosition() ? "YES" : "NO"; + str = unit().access == Access::Direct || + (unit().mayPosition() && unit().isFixedRecordLength) + ? "YES" + : "NO"; break; case HashInquiryKeyword("ENCODING"): str = unit().isUnformatted ? "UNDEFINED" @@ -831,7 +834,7 @@ bool InquireUnitState::Inquire( str = unit().isUnformatted ? "UNFORMATTED" : "FORMATTED"; break; case HashInquiryKeyword("FORMATTED"): - str = "YES"; + str = !unit().isUnformatted ? "YES" : "NO"; break; case HashInquiryKeyword("NAME"): str = unit().path(); @@ -887,7 +890,9 @@ bool InquireUnitState::Inquire( } break; case HashInquiryKeyword("SEQUENTIAL"): - str = "YES"; + // "NO" for Direct, since Sequential would not work if + // the unit were reopened without RECL=. + str = unit().access == Access::Sequential ? "YES" : "NO"; break; case HashInquiryKeyword("SIGN"): str = unit().isUnformatted ? "UNDEFINED" @@ -895,13 +900,13 @@ bool InquireUnitState::Inquire( : "SUPPRESS"; break; case HashInquiryKeyword("STREAM"): - str = "YES"; + str = unit().access == Access::Stream ? "YES" : "NO"; break; case HashInquiryKeyword("WRITE"): str = unit().mayWrite() ? "YES" : "NO"; break; case HashInquiryKeyword("UNFORMATTED"): - str = "YES"; + str = unit().isUnformatted ? "YES" : "NO"; break; } if (str) { @@ -1090,6 +1095,10 @@ bool InquireUnconnectedFileState::Inquire( break; case HashInquiryKeyword("DIRECT"): case HashInquiryKeyword("ENCODING"): + case HashInquiryKeyword("FORMATTED"): + case HashInquiryKeyword("SEQUENTIAL"): + case HashInquiryKeyword("STREAM"): + case HashInquiryKeyword("UNFORMATTED"): str = "UNKNONN"; break; case HashInquiryKeyword("READ"): @@ -1101,12 +1110,6 @@ bool InquireUnconnectedFileState::Inquire( case HashInquiryKeyword("WRITE"): str = MayWrite(path_.get()) ? "YES" : "NO"; break; - case HashInquiryKeyword("FORMATTED"): - case HashInquiryKeyword("SEQUENTIAL"): - case HashInquiryKeyword("STREAM"): - case HashInquiryKeyword("UNFORMATTED"): - str = "YES"; - break; case HashInquiryKeyword("NAME"): str = path_.get(); return true; From 8654a0f8bbf3e28eb210c75c8c70c739de637226 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 1 Oct 2020 13:30:46 -0400 Subject: [PATCH 293/544] [libc++] Don't re-export new/delete from libc++abi when they are defined in libc++ This is a temporary workaround until the new/delete situation is made better (i.e. we don't include new/delete in both libc++ and libc++abi by default). --- libcxx/src/CMakeLists.txt | 5 +++++ libcxxabi/src/CMakeLists.txt | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index fc9fc0e7bc27d..0e6819369ffa1 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -212,6 +212,11 @@ if (LIBCXX_ENABLE_SHARED) "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi.v${LIBCXX_LIBCPPABI_VERSION}.exp" "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp" "-Wl,-force_symbols_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/weak.exp") + + if (NOT LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS) + target_link_libraries(cxx_shared PRIVATE + "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../../libcxxabi/lib/new-delete.exp") + endif() endif() # Generate a linker script in place of a libc++.so symlink. diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index e9e454082a054..c57d6fa83aa0f 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -215,7 +215,7 @@ if (LIBCXXABI_ENABLE_SHARED) export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/itanium-base.exp") if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS) - reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/new-delete.exp") + export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/new-delete.exp") endif() if (LIBCXXABI_ENABLE_EXCEPTIONS) From e29c9d77f128e7ef9b2b5f8f09fb06b01a9dad3a Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 09:50:48 -0700 Subject: [PATCH 294/544] [flang] Fix WRITE after BACKSPACE A WRITE to an unformatted sequential variable-length unit after a BACKSPACE needs to forget its previous knowledge of the length of the record that's about to be overwritten, and a BACKSPACE after an ENDFILE or at the start of the file needs to be a no-op. Differential revision: https://reviews.llvm.org/D88675 --- flang/runtime/io-api.cpp | 1 + flang/runtime/unit.cpp | 44 +++++++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 304c40e871f46..edd338af0fa77 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -235,6 +235,7 @@ Cookie BeginUnformattedIO( if (unit.access == Access::Sequential && !unit.isFixedRecordLength) { // Create space for (sub)record header to be completed by // UnformattedIoStatementState::EndIoStatement() + unit.recordLength.reset(); // in case of prior BACKSPACE io.Emit("\0\0\0\0", 4); // placeholder for record length header } } diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index bcb8a478ad59f..77b7a74551d8f 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -132,16 +132,17 @@ void ExternalFileUnit::OpenUnit(OpenStatus status, std::optional action, static_cast(*totalBytes)); } } + endfileRecordNumber.reset(); + currentRecordNumber = 1; + if (totalBytes && recordLength && *recordLength) { + endfileRecordNumber = 1 + (*totalBytes / *recordLength); + } if (position == Position::Append) { - if (totalBytes && recordLength && *recordLength) { - endfileRecordNumber = 1 + (*totalBytes / *recordLength); - } else { + if (!endfileRecordNumber) { // Fake it so that we can backspace relative from the end - endfileRecordNumber = std::numeric_limits::max() - 1; + endfileRecordNumber = std::numeric_limits::max() - 2; } currentRecordNumber = *endfileRecordNumber; - } else { - currentRecordNumber = 1; } } @@ -374,7 +375,9 @@ void ExternalFileUnit::BeginReadingRecord(IoErrorHandler &handler) { void ExternalFileUnit::FinishReadingRecord(IoErrorHandler &handler) { RUNTIME_CHECK(handler, direction_ == Direction::Input && beganReadingRecord_); beganReadingRecord_ = false; - if (access == Access::Sequential) { + if (handler.GetIoStat() != IostatOk) { + // avoid bogus crashes in END/ERR circumstances + } else if (access == Access::Sequential) { RUNTIME_CHECK(handler, recordLength.has_value()); if (isFixedRecordLength) { frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; @@ -430,16 +433,22 @@ void ExternalFileUnit::BackspaceRecord(IoErrorHandler &handler) { handler.SignalError(IostatBackspaceNonSequential, "BACKSPACE(UNIT=%d) on non-sequential file", unitNumber()); } else { - DoImpliedEndfile(handler); - --currentRecordNumber; - BeginRecord(); - if (isFixedRecordLength) { - BackspaceFixedRecord(handler); - } else if (isUnformatted) { - BackspaceVariableUnformattedRecord(handler); + if (endfileRecordNumber && currentRecordNumber > *endfileRecordNumber) { + // BACKSPACE after ENDFILE } else { - BackspaceVariableFormattedRecord(handler); + DoImpliedEndfile(handler); + if (frameOffsetInFile_ + recordOffsetInFrame_ > 0) { + --currentRecordNumber; + if (isFixedRecordLength) { + BackspaceFixedRecord(handler); + } else if (isUnformatted) { + BackspaceVariableUnformattedRecord(handler); + } else { + BackspaceVariableFormattedRecord(handler); + } + } } + BeginRecord(); } } @@ -456,8 +465,12 @@ void ExternalFileUnit::Endfile(IoErrorHandler &handler) { } else if (!mayWrite()) { handler.SignalError(IostatEndfileUnwritable, "ENDFILE(UNIT=%d) on read-only file", unitNumber()); + } else if (endfileRecordNumber && + currentRecordNumber > *endfileRecordNumber) { + // ENDFILE after ENDFILE } else { DoEndfile(handler); + ++currentRecordNumber; } } @@ -469,7 +482,6 @@ void ExternalFileUnit::Rewind(IoErrorHandler &handler) { DoImpliedEndfile(handler); SetPosition(0); currentRecordNumber = 1; - // TODO: reset endfileRecordNumber? } } From 71124a9dbdcc76cd5efec8c148001a3f808bd769 Mon Sep 17 00:00:00 2001 From: Jamie Schmeiser Date: Thu, 1 Oct 2020 17:39:02 +0000 Subject: [PATCH 295/544] Reland No.3: Add new hidden option -print-changed which only reports changes to IR A new hidden option -print-changed is added along with code to support printing the IR as it passes through the opt pipeline in the new pass manager. Only those passes that change the IR are reported, with others only having the banner reported, indicating that they did not change the IR, were filtered out or ignored. Filtering of output via the -filter-print-funcs is supported and a new supporting hidden option -filter-passes is added. The latter takes a comma separated list of pass names and filters the output to only show those passes in the list that change the IR. The output can also be modified via the -print-module-scope function. The code introduces an abstract template base class that generalizes the comparison of IRs that takes an IR representation as template parameter. Derived classes provide overrides that provide an event based API for generalized reporting of IRs as they are changed in the opt pipeline through the new pass manager. The first of several instantiations is provided that prints the IR in a form similar to that produced by -print-after-all with the above mentioned filtering capabilities. This version, and the others to follow will be introduced at the upcoming developer's conference. Reviewed By: aeubanks (Arthur Eubanks), yrouban (Yevgeny Rouban), ychen (Yuanfang Chen), MaskRay (Fangrui Song) Differential Revision: https://reviews.llvm.org/D86360 --- .../llvm/Passes/StandardInstrumentations.h | 92 +++++++ llvm/lib/IR/LegacyPassManager.cpp | 4 +- llvm/lib/Passes/StandardInstrumentations.cpp | 242 +++++++++++++++++- llvm/test/Other/change-printer.ll | 128 +++++++++ 4 files changed, 455 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Other/change-printer.ll diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 52850898c6b8e..9d03aeb6cec46 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -122,6 +122,97 @@ class PreservedCFGCheckerInstrumentation { void registerCallbacks(PassInstrumentationCallbacks &PIC); }; +// Base class for classes that report changes to the IR. +// It presents an interface for such classes and provides calls +// on various events as the new pass manager transforms the IR. +// It also provides filtering of information based on hidden options +// specifying which functions are interesting. +// Calls are made for the following events/queries: +// 1. The initial IR processed. +// 2. To get the representation of the IR (of type \p T). +// 3. When a pass does not change the IR. +// 4. When a pass changes the IR (given both before and after representations +// of type \p T). +// 5. When an IR is invalidated. +// 6. When a pass is run on an IR that is not interesting (based on options). +// 7. When a pass is ignored (pass manager or adapter pass). +// 8. To compare two IR representations (of type \p T). +template class ChangePrinter { +protected: + ChangePrinter() {} + +public: + virtual ~ChangePrinter(); + + // Determine if this pass/IR is interesting and if so, save the IR + // otherwise it is left on the stack without data. + void saveIRBeforePass(Any IR, StringRef PassID); + // Compare the IR from before the pass after the pass. + void handleIRAfterPass(Any IR, StringRef PassID); + // Handle the situation where a pass is invalidated. + void handleInvalidatedPass(StringRef PassID); + +protected: + // Called on the first IR processed. + virtual void handleInitialIR(Any IR) = 0; + // Called before and after a pass to get the representation of the IR. + virtual void generateIRRepresentation(Any IR, StringRef PassID, + IRUnitT &Output) = 0; + // Called when the pass is not iteresting. + virtual void omitAfter(StringRef PassID, std::string &Name) = 0; + // Called when an interesting IR has changed. + virtual void handleAfter(StringRef PassID, std::string &Name, + const IRUnitT &Before, const IRUnitT &After, + Any) = 0; + // Called when an interesting pass is invalidated. + virtual void handleInvalidated(StringRef PassID) = 0; + // Called when the IR or pass is not interesting. + virtual void handleFiltered(StringRef PassID, std::string &Name) = 0; + // Called when an ignored pass is encountered. + virtual void handleIgnored(StringRef PassID, std::string &Name) = 0; + // Called to compare the before and after representations of the IR. + virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0; + + // Stack of IRs before passes. + std::vector BeforeStack; + // Is this the first IR seen? + bool InitialIR = true; +}; + +// A change printer based on the string representation of the IR as created +// by unwrapAndPrint. The string representation is stored in a std::string +// to preserve it as the IR changes in each pass. Note that the banner is +// included in this representation but it is massaged before reporting. +class IRChangePrinter : public ChangePrinter { +public: + IRChangePrinter(); + ~IRChangePrinter() override; + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +protected: + // Called on the first IR processed. + void handleInitialIR(Any IR) override; + // Called before and after a pass to get the representation of the IR. + void generateIRRepresentation(Any IR, StringRef PassID, + std::string &Output) override; + // Called when the pass is not iteresting. + void omitAfter(StringRef PassID, std::string &Name) override; + // Called when an interesting IR has changed. + void handleAfter(StringRef PassID, std::string &Name, + const std::string &Before, const std::string &After, + Any) override; + // Called when an interesting pass is invalidated. + void handleInvalidated(StringRef PassID) override; + // Called when the IR or pass is not interesting. + void handleFiltered(StringRef PassID, std::string &Name) override; + // Called when an ignored pass is encountered. + void handleIgnored(StringRef PassID, std::string &Name) override; + // Called to compare the before and after representations of the IR. + bool same(const std::string &Before, const std::string &After) override; + + raw_ostream &Out; +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -130,6 +221,7 @@ class StandardInstrumentations { TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; PreservedCFGCheckerInstrumentation PreservedCFGChecker; + IRChangePrinter PrintChangedIR; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 8d9ed917bb617..7f94d42d6ecde 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -87,14 +87,14 @@ static cl::opt PrintAfterAll("print-after-all", static cl::opt PrintModuleScope("print-module-scope", cl::desc("When printing IR for print-[before|after]{-all} " - "always print a module IR"), + "and change reporters, always print a module IR"), cl::init(false), cl::Hidden); static cl::list PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), cl::desc("Only print IR for functions whose name " "match this for all print-[before|after][-all] " - "options"), + "and change reporter options"), cl::CommaSeparated, cl::Hidden); /// This is a helper to determine whether to print IR before or diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 2ee373b912be0..d2ef2cd4ed61e 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include #include using namespace llvm; @@ -51,18 +52,48 @@ static cl::opt cl::desc("Print all pass management debugging information. " "`-debug-pass-manager` must also be specified")); +// An option that prints out the IR after passes, similar to +// -print-after-all except that it only prints the IR after passes that +// change the IR. Those passes that do not make changes to the IR are +// reported as not making any changes. In addition, the initial IR is +// also reported. Other hidden options affect the output from this +// option. -filter-passes will limit the output to the named passes +// that actually change the IR and other passes are reported as filtered out. +// The specified passes will either be reported as making no changes (with +// no IR reported) or the changed IR will be reported. Also, the +// -filter-print-funcs and -print-module-scope options will do similar +// filtering based on function name, reporting changed IRs as functions(or +// modules if -print-module-scope is specified) for a particular function +// or indicating that the IR has been filtered out. The extra options +// can be combined, allowing only changed IRs for certain passes on certain +// functions to be reported in different formats, with the rest being +// reported as filtered out. +static cl::opt PrintChanged("print-changed", + cl::desc("Print changed IRs"), + cl::init(false), cl::Hidden); +// An option that supports the -print-changed option. See +// the description for -print-changed for an explanation of the use +// of this option. Note that this option has no effect without -print-changed. +static cl::list + PrintPassesList("filter-passes", cl::value_desc("pass names"), + cl::desc("Only consider IR changes for passes whose names " + "match for the print-changed option"), + cl::CommaSeparated, cl::Hidden); + namespace { /// Extracting Module out of \p IR unit. Also fills a textual description /// of \p IR for use in header when printing. -Optional> unwrapModule(Any IR) { +Optional> +unwrapModule(Any IR, bool Force = false) { if (any_isa(IR)) return std::make_pair(any_cast(IR), std::string()); if (any_isa(IR)) { const Function *F = any_cast(IR); - if (!llvm::isFunctionInPrintList(F->getName())) + if (!Force && !llvm::isFunctionInPrintList(F->getName())) return None; + const Module *M = F->getParent(); return std::make_pair(M, formatv(" (function: {0})", F->getName()).str()); } @@ -71,18 +102,19 @@ Optional> unwrapModule(Any IR) { const LazyCallGraph::SCC *C = any_cast(IR); for (const LazyCallGraph::Node &N : *C) { const Function &F = N.getFunction(); - if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) { + if (Force || (!F.isDeclaration() && isFunctionInPrintList(F.getName()))) { const Module *M = F.getParent(); return std::make_pair(M, formatv(" (scc: {0})", C->getName()).str()); } } + assert(!Force && "Expected to have made a pair when forced."); return None; } if (any_isa(IR)) { const Loop *L = any_cast(IR); const Function *F = L->getHeader()->getParent(); - if (!isFunctionInPrintList(F->getName())) + if (!Force && !isFunctionInPrintList(F->getName())) return None; const Module *M = F->getParent(); std::string LoopName; @@ -107,7 +139,8 @@ void printIR(raw_ostream &OS, const Function *F, StringRef Banner, } void printIR(raw_ostream &OS, const Module *M, StringRef Banner, - StringRef Extra = StringRef(), bool Brief = false) { + StringRef Extra = StringRef(), bool Brief = false, + bool ShouldPreserveUseListOrder = false) { if (Brief) { OS << M->getName() << '\n'; return; @@ -115,7 +148,7 @@ void printIR(raw_ostream &OS, const Module *M, StringRef Banner, if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) { OS << Banner << Extra << "\n"; - M->print(OS, nullptr, false); + M->print(OS, nullptr, ShouldPreserveUseListOrder); } else { for (const auto &F : M->functions()) { printIR(OS, &F, Banner, Extra); @@ -159,17 +192,19 @@ void printIR(raw_ostream &OS, const Loop *L, StringRef Banner, /// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into /// llvm::Any and does actual print job. void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, - bool ForceModule = false, bool Brief = false) { + bool ForceModule = false, bool Brief = false, + bool ShouldPreserveUseListOrder = false) { if (ForceModule) { if (auto UnwrappedModule = unwrapModule(IR)) - printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second); + printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second, + Brief, ShouldPreserveUseListOrder); return; } if (any_isa(IR)) { const Module *M = any_cast(IR); assert(M && "module should be valid for printing"); - printIR(OS, M, Banner, "", Brief); + printIR(OS, M, Banner, "", Brief, ShouldPreserveUseListOrder); return; } @@ -197,8 +232,196 @@ void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, llvm_unreachable("Unknown wrapped IR type"); } +// Return true when this is a pass for which changes should be ignored +inline bool isIgnored(StringRef PassID) { + return isSpecialPass(PassID, + {"PassManager", "PassAdaptor", "AnalysisManagerProxy"}); +} + +// Return true when this is a defined function for which printing +// of changes is desired. +inline bool isInterestingFunction(const Function &F) { + return llvm::isFunctionInPrintList(F.getName()); +} + +// Return true when this is a pass for which printing of changes is desired. +inline bool isInterestingPass(StringRef PassID) { + if (isIgnored(PassID)) + return false; + + static std::unordered_set PrintPassNames(PrintPassesList.begin(), + PrintPassesList.end()); + return PrintPassNames.empty() || PrintPassNames.count(PassID.str()); +} + +// Return true when this is a pass on IR for which printing +// of changes is desired. +bool isInteresting(Any IR, StringRef PassID) { + if (!isInterestingPass(PassID)) + return false; + if (any_isa(IR)) + return isInterestingFunction(*any_cast(IR)); + return true; +} + } // namespace +template +void ChangePrinter::saveIRBeforePass(Any IR, StringRef PassID) { + // Always need to place something on the stack because invalidated passes + // are not given the IR so it cannot be determined whether the pass was for + // something that was filtered out. + BeforeStack.emplace_back(); + + if (!isInteresting(IR, PassID)) + return; + // Is this the initial IR? + if (InitialIR) { + InitialIR = false; + handleInitialIR(IR); + } + + // Save the IR representation on the stack. + IRUnitT &Data = BeforeStack.back(); + generateIRRepresentation(IR, PassID, Data); +} + +template +void ChangePrinter::handleIRAfterPass(Any IR, StringRef PassID) { + assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); + std::string Name; + + // unwrapModule has inconsistent handling of names for function IRs. + if (any_isa(IR)) { + const Function *F = any_cast(IR); + Name = formatv(" (function: {0})", F->getName()).str(); + } else { + if (auto UM = unwrapModule(IR)) + Name = UM->second; + } + if (Name.empty()) + Name = " (module)"; + + if (isIgnored(PassID)) + handleIgnored(PassID, Name); + else if (!isInteresting(IR, PassID)) + handleFiltered(PassID, Name); + else { + // Get the before rep from the stack + IRUnitT &Before = BeforeStack.back(); + // Create the after rep + IRUnitT After; + generateIRRepresentation(IR, PassID, After); + + // Was there a change in IR? + if (same(Before, After)) + omitAfter(PassID, Name); + else + handleAfter(PassID, Name, Before, After, IR); + } + BeforeStack.pop_back(); +} + +template +void ChangePrinter::handleInvalidatedPass(StringRef PassID) { + assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); + + // Always flag it as invalidated as we cannot determine when + // a pass for a filtered function is invalidated since we do not + // get the IR in the call. Also, the output is just alternate + // forms of the banner anyway. + handleInvalidated(PassID); + BeforeStack.pop_back(); +} + +template ChangePrinter::~ChangePrinter() { + assert(BeforeStack.empty() && "Problem with Change Printer stack."); +} + +IRChangePrinter::IRChangePrinter() : Out(dbgs()) {} + +IRChangePrinter::~IRChangePrinter() {} + +void IRChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (!PrintChanged) + return; + + PIC.registerBeforePassCallback([this](StringRef P, Any IR) { + saveIRBeforePass(IR, P); + return true; + }); + + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + handleIRAfterPass(IR, P); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &) { + handleInvalidatedPass(P); + }); +} + +void IRChangePrinter::handleInitialIR(Any IR) { + // Always print the module. + // Unwrap and print directly to avoid filtering problems in general routines. + auto UnwrappedModule = unwrapModule(IR, /*Force=*/true); + assert(UnwrappedModule && "Expected module to be unwrapped when forced."); + Out << "*** IR Dump At Start: ***" << UnwrappedModule->second << "\n"; + UnwrappedModule->first->print(Out, nullptr, + /*ShouldPreserveUseListOrder=*/true); +} + +void IRChangePrinter::generateIRRepresentation(Any IR, StringRef PassID, + std::string &Output) { + raw_string_ostream OS(Output); + // use the after banner for all cases so it will match + SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID); + unwrapAndPrint(OS, IR, Banner, llvm::forcePrintModuleIR(), + /*Brief=*/false, /*ShouldPreserveUseListOrder=*/true); + OS.str(); +} + +void IRChangePrinter::omitAfter(StringRef PassID, std::string &Name) { + Out << formatv("*** IR Dump After {0}{1} omitted because no change ***\n", + PassID, Name); +} + +void IRChangePrinter::handleAfter(StringRef PassID, std::string &Name, + const std::string &Before, + const std::string &After, Any) { + assert(After.find("*** IR Dump") == 0 && "Unexpected banner format."); + StringRef AfterRef = After; + StringRef Banner = + AfterRef.take_until([](char C) -> bool { return C == '\n'; }); + Out << Banner; + + // LazyCallGraph::SCC already has "(scc:..." in banner so only add + // in the name if it isn't already there. + if (Name.substr(0, 6) != " (scc:" && !llvm::forcePrintModuleIR()) + Out << Name; + + Out << After.substr(Banner.size()); +} + +void IRChangePrinter::handleInvalidated(StringRef PassID) { + Out << formatv("*** IR Pass {0} invalidated ***\n", PassID); +} + +void IRChangePrinter::handleFiltered(StringRef PassID, std::string &Name) { + SmallString<20> Banner = + formatv("*** IR Dump After {0}{1} filtered out ***\n", PassID, Name); + Out << Banner; +} + +void IRChangePrinter::handleIgnored(StringRef PassID, std::string &Name) { + Out << formatv("*** IR Pass {0}{1} ignored ***\n", PassID, Name); +} + +bool IRChangePrinter::same(const std::string &Before, + const std::string &After) { + return Before == After; +} + PrintIRInstrumentation::~PrintIRInstrumentation() { assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit"); } @@ -508,4 +731,5 @@ void StandardInstrumentations::registerCallbacks( TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); + PrintChangedIR.registerCallbacks(PIC); } diff --git a/llvm/test/Other/change-printer.ll b/llvm/test/Other/change-printer.ll new file mode 100644 index 0000000000000..7e3f0046ef79a --- /dev/null +++ b/llvm/test/Other/change-printer.ll @@ -0,0 +1,128 @@ +; Simple checks of -print-changed functionality +; +; Note that (mostly) only the banners are checked. +; +; Simple functionality check. +; RUN: opt -S -print-changed -passes=instsimplify 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-SIMPLE +; +; Check that only the passes that change the IR are printed and that the +; others (including g) are filtered out. +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FUNC-FILTER +; +; Check that the reporting of IRs respects -print-module-scope +; RUN: opt -S -print-changed -passes=instsimplify -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-PRINT-MOD-SCOPE +; +; Check that the reporting of IRs respects -print-module-scope +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FUNC-FILTER-MOD-SCOPE +; +; Check that reporting of multiple functions happens +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs="f,g" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FILTER-MULT-FUNC +; +; Check that the reporting of IRs respects -filter-passes +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FILTER-PASSES +; +; Check that the reporting of IRs respects -filter-passes with multiple passes +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FILTER-MULT-PASSES +; +; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FILTER-FUNC-PASSES +; +; Check that the reporting of IRs respects -filter-passes, -filter-print-funcs and -print-module-scope +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-FILTER-FUNC-PASSES-MOD-SCOPE +; +; Check that repeated passes that change the IR are printed and that the +; others (including g) are filtered out. Note that the second time +; instsimplify is run on f, it does not change the IR +; RUN: opt -S -print-changed -passes="instsimplify,instsimplify" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK-MULT-PASSES-FILTER-FUNC + +define i32 @g() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +define i32 @f() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +; CHECK-SIMPLE: *** IR Dump At Start: *** +; CHECK-SIMPLE-NEXT: ; ModuleID = {{.+}} +; CHECK-SIMPLE: *** IR Dump After VerifierPass (module) omitted because no change *** +; CHECK-SIMPLE: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK-SIMPLE-NEXT: define i32 @g() +; CHECK-SIMPLE: *** IR Pass PassManager{{.*}} (function: g) ignored *** +; CHECK-SIMPLE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-SIMPLE-NEXT: define i32 @f() +; CHECK-SIMPLE: *** IR Pass PassManager{{.*}} (function: f) ignored *** +; CHECK-SIMPLE: *** IR Pass ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> (module) ignored *** +; CHECK-SIMPLE: *** IR Dump After VerifierPass (module) omitted because no change *** +; CHECK-SIMPLE: *** IR Dump After PrintModulePass (module) omitted because no change *** +; CHECK-SIMPLE-NOT: *** IR + +; CHECK-FUNC-FILTER: *** IR Dump At Start: *** +; CHECK-FUNC-FILTER-NEXT: ; ModuleID = {{.+}} +; CHECK-FUNC-FILTER: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-FUNC-FILTER: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-FUNC-FILTER-NEXT: define i32 @f() + +; CHECK-PRINT-MOD-SCOPE: *** IR Dump At Start: *** +; CHECK-PRINT-MOD-SCOPE-NEXT: ModuleID = {{.+}} +; CHECK-PRINT-MOD-SCOPE: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK-PRINT-MOD-SCOPE-NEXT: ModuleID = {{.+}} +; CHECK-PRINT-MOD-SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-PRINT-MOD-SCOPE-NEXT: ModuleID = {{.+}} + +; CHECK-FUNC-FILTER-MOD-SCOPE: *** IR Dump At Start: *** +; CHECK-FUNC-FILTER-MOD-SCOPE-NEXT: ; ModuleID = {{.+}} +; CHECK-FUNC-FILTER-MOD-SCOPE: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-FUNC-FILTER-MOD-SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-FUNC-FILTER-MOD-SCOPE-NEXT: ModuleID = {{.+}} + +; CHECK-FILTER-MULT-FUNC: *** IR Dump At Start: *** +; CHECK-FILTER-MULT-FUNC-NEXT: ; ModuleID = {{.+}} +; CHECK-FILTER-MULT-FUNC: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK-FILTER-MULT-FUNC-NEXT: define i32 @g() +; CHECK-FILTER-MULT-FUNC: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-FILTER-MULT-FUNC-NEXT: define i32 @f() + +; CHECK-FILTER-PASSES: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-FILTER-PASSES: *** IR Dump At Start: *** (function: g) +; CHECK-FILTER-PASSES-NEXT: ; ModuleID = {{.+}} +; CHECK-FILTER-PASSES: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** +; CHECK-FILTER-PASSES: *** IR Dump After InstSimplifyPass (function: f) filtered out *** +; CHECK-FILTER-PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK-FILTER-MULT-PASSES: *** IR Dump At Start: *** (function: g) +; CHECK-FILTER-MULT-PASSES-NEXT: ; ModuleID = {{.+}} +; CHECK-FILTER-MULT-PASSES: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK-FILTER-MULT-PASSES-NEXT: define i32 @g() +; CHECK-FILTER-MULT-PASSES: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** +; CHECK-FILTER-MULT-PASSES: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-FILTER-MULT-PASSES-NEXT: define i32 @f() +; CHECK-FILTER-MULT-PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK-FILTER-FUNC-PASSES: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-FILTER-FUNC-PASSES: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** +; CHECK-FILTER-FUNC-PASSES: *** IR Dump At Start: *** (function: f) +; CHECK-FILTER-FUNC-PASSES-NEXT: ; ModuleID = {{.+}} +; CHECK-FILTER-FUNC-PASSES: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-FILTER-FUNC-PASSES-NEXT: define i32 @f() +; CHECK-FILTER-FUNC-PASSES: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE: *** IR Dump At Start: *** (function: f) +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE-NEXT: ; ModuleID = {{.+}} +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE-NEXT: ModuleID = {{.+}} +; CHECK-FILTER-FUNC-PASSES-MOD-SCOPE: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK-MULT-PASSES-FILTER-FUNC: *** IR Dump At Start: *** +; CHECK-MULT-PASSES-FILTER-FUNC-NEXT: ; ModuleID = {{.+}} +; CHECK-MULT-PASSES-FILTER-FUNC: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-MULT-PASSES-FILTER-FUNC: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK-MULT-PASSES-FILTER-FUNC: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK-MULT-PASSES-FILTER-FUNC-NEXT: define i32 @f() +; CHECK-MULT-PASSES-FILTER-FUNC: *** IR Dump After InstSimplifyPass (function: f) omitted because no change *** From a0119e56751c16e3104d6bd760bb1c114a79bce7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 1 Oct 2020 13:39:58 -0400 Subject: [PATCH 296/544] [libc++] NFC: Add missing SHA to ABI Changelog --- libcxx/lib/abi/CHANGELOG.TXT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/lib/abi/CHANGELOG.TXT b/libcxx/lib/abi/CHANGELOG.TXT index 7ed2b7e28d407..1720e86efb842 100644 --- a/libcxx/lib/abi/CHANGELOG.TXT +++ b/libcxx/lib/abi/CHANGELOG.TXT @@ -16,7 +16,7 @@ New entries should be added directly below the "Version" header. Version 12.0 ------------ -* XXXXXXX - [libc++] Simplify how we re-export symbols from libc++abi +* 4f13b9992971 - [libc++] Simplify how we re-export symbols from libc++abi We re-export some symbols that were exported from libc++abi but not from libc++. Exporting new symbols is not an ABI break. From 6c25816d7b68e794a04ba0d7659178ab17252637 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Thu, 1 Oct 2020 10:40:03 -0700 Subject: [PATCH 297/544] [DSE] Look through memory PHI arguments when removing noop stores in MSSA. Summary: Adds support for "following" memory through MSSA PHI arguments. This will help catch more noop stores that exist between blocks. Originally part of D79391. Reviewers: fhahn, jfb, asbirlea Differential Revision: https://reviews.llvm.org/D82588 --- .../Scalar/DeadStoreElimination.cpp | 40 +++++++- .../DeadStoreElimination/MSSA/noop-stores.ll | 93 ++++++++++++++++++- .../DeadStoreElimination/MSSA/simple-todo.ll | 25 ----- 3 files changed, 128 insertions(+), 30 deletions(-) delete mode 100644 llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d36fb4439ecc5..c4743c22daac1 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -2404,10 +2404,44 @@ struct DSEState { if (auto *LoadI = dyn_cast(Store->getOperand(0))) { if (LoadI->getPointerOperand() == Store->getOperand(1)) { + // Get the defining access for the load. auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess(); - // If both accesses share the same defining access, no instructions - // between them can modify the memory location. - return LoadAccess == Def->getDefiningAccess(); + // Fast path: the defining accesses are the same. + if (LoadAccess == Def->getDefiningAccess()) + return true; + + // Look through phi accesses. Recursively scan all phi accesses by + // adding them to a worklist. Bail when we run into a memory def that + // does not match LoadAccess. + SetVector ToCheck; + MemoryAccess *Current = Def->getDefiningAccess(); + // We don't want to bail when we run into the store memory def. But, + // the phi access may point to it. So, pretend like we've already + // checked it. + ToCheck.insert(Def); + ToCheck.insert(Current); + // Start at current (1) to simulate already having checked Def. + for (unsigned I = 1; I < ToCheck.size(); ++I) { + Current = ToCheck[I]; + if (auto PhiAccess = dyn_cast(Current)) { + // Check all the operands. + for (auto &Use : PhiAccess->incoming_values()) + ToCheck.insert(cast(&Use)); + continue; + } + + // If we found a memory def, bail. This happens when we have an + // unrelated write in between an otherwise noop store. + assert(isa(Current) && + "Only MemoryDefs should reach here."); + // TODO: Skip no alias MemoryDefs that have no aliasing reads. + // We are searching for the definition of the store's destination. + // So, if that is the same definition as the load, then this is a + // noop. Otherwise, fail. + if (LoadAccess != Current) + return false; + } + return true; } } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll index 6a9c4b80b3ddf..982bd3bdc5403 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll @@ -101,6 +101,47 @@ bb3: ret i32 0 } +; Remove redundant store if loaded value is in another block inside a loop. +define i32 @test31(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test31( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br label %bb1 +bb1: + store i32 %v, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb2: + ret i32 0 +} + +; Don't remove "redundant" store if %p is possibly stored to. +define i32 @test46(i1 %c, i32* %p, i32* %p2, i32 %i) { +; CHECK-LABEL: @test46( +; CHECK: load +; CHECK: store +; CHECK: store +; CHECK: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br label %bb1 +bb1: + store i32 %v, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb2: + store i32 0, i32* %p2, align 4 + br i1 %c, label %bb3, label %bb1 +bb3: + ret i32 0 +} + declare void @unknown_func() ; Remove redundant store, which is in the lame loop as the load. @@ -112,7 +153,7 @@ define i32 @test33(i1 %c, i32* %p, i32 %i) { ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: ret i32 0 ; @@ -125,7 +166,7 @@ bb2: store i32 %v, i32* %p, align 4 ; Might read and overwrite value at %p, but doesn't matter. call void @unknown_func() - br i1 undef, label %bb1, label %bb3 + br i1 %c, label %bb1, label %bb3 bb3: ret i32 0 } @@ -168,4 +209,52 @@ define void @test45(i32* %Q) { ret void } +define i32 @test48(i1 %c, i32* %p) { +; CHECK-LABEL: @test48( +; CHECK: entry: +; CHECK-NEXT: [[V:%.*]] = load +; CHECK: store i32 0 +; CHECK: store i32 [[V]] +; CHECK: ret i32 0 +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb0, label %bb0.0 + +bb0: + store i32 0, i32* %p + br i1 %c, label %bb1, label %bb2 + +bb0.0: + br label %bb1 + +bb1: + store i32 %v, i32* %p, align 4 + br i1 %c, label %bb2, label %bb0 +bb2: + ret i32 0 +} + +; TODO: Remove both redundant stores if loaded value is in another block inside a loop. +define i32 @test47(i1 %c, i32* %p, i32 %i) { +; X-CHECK-LABEL: @test47( +; X-CHECK-NEXT: entry: +; X-CHECK-NEXT: br label [[BB1:%.*]] +; X-CHECK: bb1: +; X-CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]] +; X-CHECK: bb2: +; X-CHECK-NEXT: br i1 [[C]], label [[BB2]], label [[BB3:%.*]] +; X-CHECK: bb3: +; X-CHECK-NEXT: ret i32 0 +entry: + %v = load i32, i32* %p, align 4 + br label %bb1 +bb1: + store i32 %v, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb2: + store i32 %v, i32* %p, align 4 + br i1 %c, label %bb3, label %bb1 +bb3: + ret i32 0 +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll deleted file mode 100644 index a4d3127d25f3d..0000000000000 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ /dev/null @@ -1,25 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -; Remove redundant store if loaded value is in another block inside a loop. -define i32 @test31(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test31( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br label %bb1 -bb1: - store i32 %v, i32* %p, align 4 - br i1 undef, label %bb1, label %bb2 -bb2: - ret i32 0 -} From 114e964dce9f18e8f3c25a3a4136e59ead9ae50c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 1 Oct 2020 11:58:01 -0400 Subject: [PATCH 298/544] [InstCombine] auto-generate complete test checks; NFC --- .../InstCombine/bitreverse-known-bits.ll | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll b/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll index cd1523a3b06ba..b8702f64dfdcf 100644 --- a/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll +++ b/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll @@ -1,11 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -instcombine | FileCheck %s declare i8 @llvm.bitreverse.i8(i8) declare i32 @llvm.bitreverse.i32(i32) -; CHECK-LABEL: @test1 -; CHECK: ret i1 true define i1 @test1(i32 %arg) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: ret i1 true +; %a = or i32 %arg, 4294901760 %b = call i32 @llvm.bitreverse.i32(i32 %a) %and = and i32 %b, 65535 @@ -13,9 +15,10 @@ define i1 @test1(i32 %arg) { ret i1 %res } -; CHECK-LABEL: @test2 -; CHECK: ret i1 true define i1 @test2(i32 %arg) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: ret i1 true +; %a = or i32 %arg, 1 %b = call i32 @llvm.bitreverse.i32(i32 %a) %c = and i32 %b, 2147483648 @@ -24,9 +27,10 @@ define i1 @test2(i32 %arg) { ret i1 %res } -; CHECK-LABEL: @test3 -; CHECK: ret i1 false define i1 @test3(i32 %arg) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: ret i1 false +; %a = or i32 %arg, 65536 %b = call i32 @llvm.bitreverse.i32(i32 %a) %and = and i32 %b, 32768 @@ -34,18 +38,22 @@ define i1 @test3(i32 %arg) { ret i1 %res } -; CHECK-LABEL: @add_bitreverse -; Make sure we process range metadata on bitreverse +; known bits for the bitreverse will say the result is in the range [0, 64) +; but the metadata says [0, 16). So make sure the range metadata wins. +; add %reverse, 1111 0000 +; should become +; or %reverse, 1111 0000 + define i8 @add_bitreverse(i8 %a) { +; CHECK-LABEL: @add_bitreverse( +; CHECK-NEXT: [[B:%.*]] = and i8 [[A:%.*]], -4 +; CHECK-NEXT: [[REVERSE:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[B]]), [[RNG0:!range !.*]] +; CHECK-NEXT: [[C:%.*]] = or i8 [[REVERSE]], -16 +; CHECK-NEXT: ret i8 [[C]] +; %b = and i8 %a, 252 - ; known bits for the bitreverse will say the result is in the range [0, 64) - ; but the metadata says [0, 16). So make sure the range metadata wins. - ; add %reverse, 1111 0000 - ; should become - ; or %reverse, 1111 0000 %reverse = call i8 @llvm.bitreverse.i8(i8 %b), !range !1 %c = add i8 %reverse, -16 -; CHECK: or i8 %reverse, -16 ret i8 %c } !1 = !{i8 0, i8 16} From 686eb0d8ded9159b090c3ef7b33a422e1f05166e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 1 Oct 2020 13:45:05 -0400 Subject: [PATCH 299/544] [AST] do not error on APFloat invalidOp in default mode If FP exceptions are ignored, we should not error out of compilation just because APFloat indicated an exception. This is required as a preliminary step for D88238 which changes APFloat behavior for signaling NaN convert() to set the opInvalidOp exception status. Currently, there is no way to trigger this error because convert() never sets opInvalidOp. FP binops that set opInvalidOp also create a NaN, so the path to checkFloatingPointResult() is blocked by a different diagnostic: // [expr.pre]p4: // If during the evaluation of an expression, the result is not // mathematically defined [...], the behavior is undefined. // FIXME: C++ rules require us to not conform to IEEE 754 here. if (LHS.isNaN()) { Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << LHS.isNaN(); return Info.noteUndefinedBehavior(); } return checkFloatingPointResult(Info, E, St); Differential Revision: https://reviews.llvm.org/D88664 --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b17eed2dc823d..4460e3a17e6da 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -2439,7 +2439,8 @@ static bool checkFloatingPointResult(EvalInfo &Info, const Expr *E, return false; } - if (St & APFloat::opStatus::opInvalidOp) { + if ((St & APFloat::opStatus::opInvalidOp) && + FPO.getFPExceptionMode() != LangOptions::FPE_Ignore) { // There is no usefully definable result. Info.FFDiag(E); return false; From ba9b15072c5aa6c6d89bcb8b4f7af9d546867292 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 1 Oct 2020 13:55:39 -0400 Subject: [PATCH 300/544] [libc++][ci] Add a job to run the vanilla configuration on Apple Previously, we'd only have jobs testing the Apple cache on Apple platforms, but libc++ should also work out-of-the-box. --- libcxx/utils/ci/buildkite-pipeline.yml | 29 +++++++++++++++----------- libcxx/utils/ci/run-buildbot.sh | 28 ++++++++++++------------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index d9fb0925c6d84..63fb5d5f8f424 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -16,65 +16,70 @@ steps: - label: "C++03" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx03 | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx03 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++11" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx11 | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx11 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++14" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx14 | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx14 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++17" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx17 | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx17 | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "C++20" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-cxx2a | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx2a | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "-fno-exceptions" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-noexceptions | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-noexceptions | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "GCC/C++20" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-gcc | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-gcc | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "ASAN" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-asan | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-asan | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "TSAN" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-tsan | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-tsan | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "UBSAN" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-ubsan | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-ubsan | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "With LLVM's libunwind" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-with_llvm_unwinder | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-with_llvm_unwinder | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" - label: "Single-threaded" - command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-ubuntu-singlethreaded | libcxx/utils/ci/phabricator-report" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-singlethreaded | libcxx/utils/ci/phabricator-report" agents: queue: "libcxx-builders" + - label: "MacOS C++20" + command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh generic-cxx2a | libcxx/utils/ci/phabricator-report" + agents: + queue: "libcxx-macos-builders" + # Build with the configuration we use to generate libc++.dylib on Apple platforms - label: "Apple system" command: "set -o pipefail && libcxx/utils/ci/run-buildbot.sh x86_64-apple-system | libcxx/utils/ci/phabricator-report" diff --git a/libcxx/utils/ci/run-buildbot.sh b/libcxx/utils/ci/run-buildbot.sh index 1f4b5df731ff3..0dee6ae75737f 100755 --- a/libcxx/utils/ci/run-buildbot.sh +++ b/libcxx/utils/ci/run-buildbot.sh @@ -18,83 +18,83 @@ args+=("-DLLVM_ENABLE_PROJECTS=libcxx;libunwind;libcxxabi") args+=("-DLIBCXX_CXX_ABI=libcxxabi") case "${BUILDER}" in -x86_64-ubuntu-cxx03) +generic-cxx03) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++03") ;; -x86_64-ubuntu-cxx11) +generic-cxx11) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++11") ;; -x86_64-ubuntu-cxx14) +generic-cxx14) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++14") ;; -x86_64-ubuntu-cxx17) +generic-cxx17) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++17") ;; -x86_64-ubuntu-cxx2a) +generic-cxx2a) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param=std=c++2a") ;; -x86_64-ubuntu-noexceptions) +generic-noexceptions) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") args+=("-DLIBCXX_ENABLE_EXCEPTIONS=OFF") args+=("-DLIBCXXABI_ENABLE_EXCEPTIONS=OFF") ;; -x86_64-ubuntu-32bit) +generic-32bit) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") args+=("-DLLVM_BUILD_32_BITS=ON") ;; -x86_64-ubuntu-gcc) +generic-gcc) export CC=gcc export CXX=g++ # FIXME: Re-enable experimental testing on GCC. GCC cares about the order # in which we link -lc++experimental, which causes issues. args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported --param enable_experimental=False") ;; -x86_64-ubuntu-asan) +generic-asan) export CC=clang export CXX=clang++ args+=("-DLLVM_USE_SANITIZER=Address") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") ;; -x86_64-ubuntu-msan) +generic-msan) export CC=clang export CXX=clang++ args+=("-DLLVM_USE_SANITIZER=MemoryWithOrigins") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") ;; -x86_64-ubuntu-tsan) +generic-tsan) export CC=clang export CXX=clang++ args+=("-DLLVM_USE_SANITIZER=Thread") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") ;; -x86_64-ubuntu-ubsan) +generic-ubsan) export CC=clang export CXX=clang++ args+=("-DLLVM_USE_SANITIZER=Undefined") args+=("-DLIBCXX_ABI_UNSTABLE=ON") args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") ;; -x86_64-ubuntu-with_llvm_unwinder) +generic-with_llvm_unwinder) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") args+=("-DLIBCXXABI_USE_LLVM_UNWINDER=ON") ;; -x86_64-ubuntu-singlethreaded) +generic-singlethreaded) export CC=clang export CXX=clang++ args+=("-DLLVM_LIT_ARGS=-sv --show-unsupported") From c1b209cc61290f1ce1243470b825e0994645cb7d Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 1 Oct 2020 19:45:01 +0200 Subject: [PATCH 301/544] [Format] Don't treat compound extension headers (foo.proto.h) as foo.cc main-file header. We receive internal bugs about this false positives after D86597. Differential Revision: https://reviews.llvm.org/D88640. --- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp | 7 ++++++- clang/unittests/Format/SortIncludesTest.cpp | 10 ++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index e0368975ea3ed..0cc4afa4ade6c 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -233,7 +233,12 @@ int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName, bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { if (!IncludeName.startswith("\"")) return false; - StringRef HeaderStem = matchingStem(IncludeName.drop_front(1).drop_back(1)); + + // Not matchingStem: implementation files may have compound extensions but + // headers may not. + StringRef HeaderStem = + llvm::sys::path::stem(IncludeName.drop_front(1).drop_back( + 1) /* remove the surrounding "" or <> */); if (FileStem.startswith(HeaderStem) || FileStem.startswith_lower(HeaderStem)) { llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, diff --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp index db3ed65d443b8..c327be5e6b0ba 100644 --- a/clang/unittests/Format/SortIncludesTest.cpp +++ b/clang/unittests/Format/SortIncludesTest.cpp @@ -151,6 +151,16 @@ TEST_F(SortIncludesTest, NoReplacementsForValidIncludes) { EXPECT_TRUE(sortIncludes(FmtStyle, Code, GetCodeRange(Code), "a.cc").empty()); } +TEST_F(SortIncludesTest, NoMainFileHeader) { + std::string Code = "#include \n" + "\n" + "#include \"a/extra_action.proto.h\"\n"; + FmtStyle = getGoogleStyle(FormatStyle::LK_Cpp); + EXPECT_TRUE( + sortIncludes(FmtStyle, Code, GetCodeRange(Code), "a/extra_action.cc") + .empty()); +} + TEST_F(SortIncludesTest, SortedIncludesInMultipleBlocksAreMerged) { Style.IncludeBlocks = tooling::IncludeStyle::IBS_Merge; EXPECT_EQ("#include \"a.h\"\n" From 79122868f9a3909cfd94d51e9bfe960917a1be05 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Thu, 1 Oct 2020 05:59:19 -0500 Subject: [PATCH 302/544] [LLD][PowerPC] Add support for R_PPC64_GOT_TLSGD_PCREL34 used in TLS General Dynamic Add Thread Local Storage support for the 34 bit relocation R_PPC64_GOT_TLSGD_PCREL34 used in General Dynamic. The compiler will produce code that looks like: ``` pla r3, x@got@tlsgd@pcrel R_PPC64_GOT_TLSGD_PCREL34 bl __tls_get_addr@notoc(x@tlsgd) R_PPC64_TLSGD R_PPC64_REL24_NOTOC ``` LLD should be able to correctly compute the relocation for R_PPC64_GOT_TLSGD_PCREL34 as well as do the following two relaxations where possible: General Dynamic to Local Exec: ``` paddi r3, r13, x@tprel nop ``` and General Dynamic to Initial Exec: ``` pld r3, x@got@tprel@pcrel add r3, r3, r13 ``` Note: This patch adds support for the PC Relative (no TOC) version of General Dynamic on top of the existing support for the TOC version of General Dynamic. The ABI does not provide any way to tell by looking only at the relocation `R_PPC64_TLSGD` when it is being used in a TOC instruction sequence or and when it is being used in a no TOC sequence. The TOC sequence should always be 4 byte aligned. This patch adds one to the offset of the relocation when it is being used in a no TOC sequence. In this way LLD can tell by looking at the alignment of the offset of `R_PPC64_TLSGD` whether or not it is being used as part of a TOC or no TOC sequence. Reviewed By: NeHuang, sfertile, MaskRay Differential Revision: https://reviews.llvm.org/D87318 --- lld/ELF/Arch/PPC64.cpp | 76 +++++++++++++++++++++---- lld/ELF/Relocations.cpp | 13 +++++ lld/test/ELF/ppc64-tls-pcrel-gd.s | 94 +++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 lld/test/ELF/ppc64-tls-pcrel-gd.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 06dd863f31b2a..2e7b20d46cb09 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -727,15 +727,38 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13 relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; - case R_PPC64_TLSGD: - write32(loc, NOP); - write32(loc + 4, 0x38630000); // addi r3, r3 - // Since we are relocating a half16 type relocation and Loc + 4 points to - // the start of an instruction we need to advance the buffer by an extra - // 2 bytes on BE. - relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), - R_PPC64_TPREL16_LO, val); + case R_PPC64_GOT_TLSGD_PCREL34: + // Relax from paddi r3, 0, x@got@tlsgd@pcrel, 1 to + // paddi r3, r13, x@tprel, 0 + writePrefixedInstruction(loc, 0x06000000386d0000); + relocateNoSym(loc, R_PPC64_TPREL34, val); + break; + case R_PPC64_TLSGD: { + // PC Relative Relaxation: + // Relax from bl __tls_get_addr@notoc(x@tlsgd) to + // nop + // TOC Relaxation: + // Relax from bl __tls_get_addr(x@tlsgd) + // nop + // to + // nop + // addi r3, r3, x@tprel@l + const uintptr_t locAsInt = reinterpret_cast(loc); + if (locAsInt % 4 == 0) { + write32(loc, NOP); // nop + write32(loc + 4, 0x38630000); // addi r3, r3 + // Since we are relocating a half16 type relocation and Loc + 4 points to + // the start of an instruction we need to advance the buffer by an extra + // 2 bytes on BE. + relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), + R_PPC64_TPREL16_LO, val); + } else if (locAsInt % 4 == 1) { + write32(loc - 1, NOP); + } else { + errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment"); + } break; + } default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } @@ -947,6 +970,8 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSGD16_LO: return R_TLSGD_GOT; + case R_PPC64_GOT_TLSGD_PCREL34: + return R_TLSGD_PC; case R_PPC64_GOT_TLSLD16: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_GOT_TLSLD16_HI: @@ -1261,6 +1286,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { break; case R_PPC64_PCREL34: case R_PPC64_GOT_PCREL34: + case R_PPC64_GOT_TLSGD_PCREL34: case R_PPC64_GOT_TPREL_PCREL34: case R_PPC64_TPREL34: { const uint64_t si0Mask = 0x00000003ffff0000; @@ -1340,7 +1366,8 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) return R_PPC64_RELAX_GOT_PC; } - if (expr == R_RELAX_TLS_GD_TO_IE) + + if (type != R_PPC64_GOT_TLSGD_PCREL34 && expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) return R_RELAX_TLS_LD_TO_LE_ABS; @@ -1381,10 +1408,35 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val); return; } - case R_PPC64_TLSGD: - write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop - write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 + case R_PPC64_GOT_TLSGD_PCREL34: { + // Relax from paddi r3, 0, sym@got@tlsgd@pcrel, 1 to + // pld r3, sym@got@tprel@pcrel + writePrefixedInstruction(loc, 0x04100000e4600000); + relocateNoSym(loc, R_PPC64_GOT_TPREL_PCREL34, val); + return; + } + case R_PPC64_TLSGD: { + // PC Relative Relaxation: + // Relax from bl __tls_get_addr@notoc(x@tlsgd) to + // nop + // TOC Relaxation: + // Relax from bl __tls_get_addr(x@tlsgd) + // nop + // to + // nop + // add r3, r3, r13 + const uintptr_t locAsInt = reinterpret_cast(loc); + if (locAsInt % 4 == 0) { + write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop + write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 + } else if (locAsInt % 4 == 1) { + // bl __tls_get_addr(sym@tlsgd) --> add r3, r3, r13 + write32(loc - 1, 0x7c636a14); + } else { + errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment"); + } return; + } default: llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 4c6a70d9034e9..ea6aa3c6a12a4 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1357,6 +1357,19 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (type == R_PPC64_TOC16_LO && sym.isSection() && isa(sym) && cast(sym).section->name == ".toc") ppc64noTocRelax.insert({&sym, addend}); + + if (type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) { + if (i == end) { + errorOrWarn("R_PPC64_TLSGD may not be the last relocation" + + getLocation(sec, sym, offset)); + return; + } + + // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC case, + // so we can discern it later from the toc-case. + if (i->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC) + ++offset; + } } // Relax relocations. diff --git a/lld/test/ELF/ppc64-tls-pcrel-gd.s b/lld/test/ELF/ppc64-tls-pcrel-gd.s new file mode 100644 index 0000000000000..2220d91fe798e --- /dev/null +++ b/lld/test/ELF/ppc64-tls-pcrel-gd.s @@ -0,0 +1,94 @@ +# REQUIRES: ppc +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/asm -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t-defs.o +# RUN: ld.lld --shared %t-defs.o -o %t-defs.so +# RUN: ld.lld -T %t/lds --shared %t.o -o %t-gd.so +# RUN: ld.lld -T %t/lds %t.o %t-defs.so -o %t-gdtoie +# RUN: ld.lld -T %t/lds %t.o %t-defs.o -o %t-gdtole + +# RUN: llvm-readelf -r %t-gd.so | FileCheck %s --check-prefix=GD-RELOC +# RUN: llvm-readelf -s %t-gd.so | FileCheck %s --check-prefix=GD-SYM +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gd.so | FileCheck %s --check-prefix=GD + +# RUN: llvm-readelf -r %t-gdtoie | FileCheck %s --check-prefix=GDTOIE-RELOC +# RUN: llvm-readelf -s %t-gdtoie | FileCheck %s --check-prefix=GDTOIE-SYM +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gdtoie | FileCheck %s --check-prefix=GDTOIE + +# RUN: llvm-readelf -r %t-gdtole | FileCheck %s --check-prefix=GDTOLE-RELOC +# RUN: llvm-readelf -s %t-gdtole | FileCheck %s --check-prefix=GDTOLE-SYM +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gdtole | FileCheck %s --check-prefix=GDTOLE + +## This test checks the General Dynamic PC Relative TLS implementation for lld. +## GD - General Dynamic with no relaxation possible +## GDTOIE - General Dynamic relaxed to Initial Exec +## GDTOLE - General Dynamic relaxed to Local Exec + +#--- lds +SECTIONS { + .text_addr 0x1001000 : { *(.text_addr) } +} + +#--- defs +.section .tbss,"awT",@nobits +.globl x +x: + .long 0 +.globl y +y: + .long 0 + +#--- asm + +# GD-RELOC: Relocation section '.rela.dyn' at offset 0x100b8 contains 4 entries: +# GD-RELOC: 0000000001001160 0000000200000044 R_PPC64_DTPMOD64 0000000000000000 x + 0 +# GD-RELOC: 0000000001001168 000000020000004e R_PPC64_DTPREL64 0000000000000000 x + 0 +# GD-RELOC: 0000000001001170 0000000300000044 R_PPC64_DTPMOD64 0000000000000000 y + 0 +# GD-RELOC: 0000000001001178 000000030000004e R_PPC64_DTPREL64 0000000000000000 y + 0 + +# GD-SYM: Symbol table '.dynsym' contains 4 entries: +# GD-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x +# GD-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT UND y + + +# GDTOIE-RELOC: Relocation section '.rela.dyn' at offset 0x10118 contains 2 entries: +# GDTOIE-RELOC: 00000000010010e0 0000000200000049 R_PPC64_TPREL64 0000000000000000 x + 0 +# GDTOIE-RELOC: 00000000010010e8 0000000300000049 R_PPC64_TPREL64 0000000000000000 y + 0 + +# GDTOIE-SYM: Symbol table '.dynsym' contains 4 entries: +# GDTOIE-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x +# GDTOIE-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT UND y + + +# GDTOLE-RELOC: There are no relocations in this file. + +# GDTOLE-SYM: Symbol table '.symtab' contains 5 entries: +# GDTOLE-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT 3 x +# GDTOLE-SYM: 4: 0000000000000004 0 TLS GLOBAL DEFAULT 3 y + +# GD-LABEL: : +# GD-NEXT: paddi 3, 0, 352, 1 +# GD-NEXT: bl +# GD-NEXT: paddi 3, 0, 356, 1 +# GD-NEXT: bl +# GD-NEXT: blr +# GDTOIE-LABEL: : +# GDTOIE-NEXT: pld 3, 224(0), 1 +# GDTOIE-NEXT: add 3, 3, 13 +# GDTOIE-NEXT: pld 3, 220(0), 1 +# GDTOIE-NEXT: add 3, 3, 13 +# GDTOIE-NEXT: blr +# GDTOLE-LABEL: : +# GDTOLE-NEXT: paddi 3, 13, -28672, 0 +# GDTOLE-NEXT: nop +# GDTOLE-NEXT: paddi 3, 13, -28668, 0 +# GDTOLE-NEXT: nop +# GDTOLE-NEXT: blr +.section .text_addr, "ax", %progbits +GDTwoVal: + paddi 3, 0, x@got@tlsgd@pcrel, 1 + bl __tls_get_addr@notoc(x@tlsgd) + paddi 3, 0, y@got@tlsgd@pcrel, 1 + bl __tls_get_addr@notoc(y@tlsgd) + blr From 5f3e565f59ee8c5614663a484df1dc853ca3694d Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Thu, 1 Oct 2020 13:28:35 -0500 Subject: [PATCH 303/544] Revert "[LLD][PowerPC] Add support for R_PPC64_GOT_TLSGD_PCREL34 used in TLS General Dynamic" This reverts commit 79122868f9a3909cfd94d51e9bfe960917a1be05. --- lld/ELF/Arch/PPC64.cpp | 76 ++++--------------------- lld/ELF/Relocations.cpp | 13 ----- lld/test/ELF/ppc64-tls-pcrel-gd.s | 94 ------------------------------- 3 files changed, 12 insertions(+), 171 deletions(-) delete mode 100644 lld/test/ELF/ppc64-tls-pcrel-gd.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 2e7b20d46cb09..06dd863f31b2a 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -727,38 +727,15 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13 relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; - case R_PPC64_GOT_TLSGD_PCREL34: - // Relax from paddi r3, 0, x@got@tlsgd@pcrel, 1 to - // paddi r3, r13, x@tprel, 0 - writePrefixedInstruction(loc, 0x06000000386d0000); - relocateNoSym(loc, R_PPC64_TPREL34, val); - break; - case R_PPC64_TLSGD: { - // PC Relative Relaxation: - // Relax from bl __tls_get_addr@notoc(x@tlsgd) to - // nop - // TOC Relaxation: - // Relax from bl __tls_get_addr(x@tlsgd) - // nop - // to - // nop - // addi r3, r3, x@tprel@l - const uintptr_t locAsInt = reinterpret_cast(loc); - if (locAsInt % 4 == 0) { - write32(loc, NOP); // nop - write32(loc + 4, 0x38630000); // addi r3, r3 - // Since we are relocating a half16 type relocation and Loc + 4 points to - // the start of an instruction we need to advance the buffer by an extra - // 2 bytes on BE. - relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), - R_PPC64_TPREL16_LO, val); - } else if (locAsInt % 4 == 1) { - write32(loc - 1, NOP); - } else { - errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment"); - } + case R_PPC64_TLSGD: + write32(loc, NOP); + write32(loc + 4, 0x38630000); // addi r3, r3 + // Since we are relocating a half16 type relocation and Loc + 4 points to + // the start of an instruction we need to advance the buffer by an extra + // 2 bytes on BE. + relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), + R_PPC64_TPREL16_LO, val); break; - } default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } @@ -970,8 +947,6 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSGD16_LO: return R_TLSGD_GOT; - case R_PPC64_GOT_TLSGD_PCREL34: - return R_TLSGD_PC; case R_PPC64_GOT_TLSLD16: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_GOT_TLSLD16_HI: @@ -1286,7 +1261,6 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { break; case R_PPC64_PCREL34: case R_PPC64_GOT_PCREL34: - case R_PPC64_GOT_TLSGD_PCREL34: case R_PPC64_GOT_TPREL_PCREL34: case R_PPC64_TPREL34: { const uint64_t si0Mask = 0x00000003ffff0000; @@ -1366,8 +1340,7 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) return R_PPC64_RELAX_GOT_PC; } - - if (type != R_PPC64_GOT_TLSGD_PCREL34 && expr == R_RELAX_TLS_GD_TO_IE) + if (expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) return R_RELAX_TLS_LD_TO_LE_ABS; @@ -1408,35 +1381,10 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val); return; } - case R_PPC64_GOT_TLSGD_PCREL34: { - // Relax from paddi r3, 0, sym@got@tlsgd@pcrel, 1 to - // pld r3, sym@got@tprel@pcrel - writePrefixedInstruction(loc, 0x04100000e4600000); - relocateNoSym(loc, R_PPC64_GOT_TPREL_PCREL34, val); - return; - } - case R_PPC64_TLSGD: { - // PC Relative Relaxation: - // Relax from bl __tls_get_addr@notoc(x@tlsgd) to - // nop - // TOC Relaxation: - // Relax from bl __tls_get_addr(x@tlsgd) - // nop - // to - // nop - // add r3, r3, r13 - const uintptr_t locAsInt = reinterpret_cast(loc); - if (locAsInt % 4 == 0) { - write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop - write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 - } else if (locAsInt % 4 == 1) { - // bl __tls_get_addr(sym@tlsgd) --> add r3, r3, r13 - write32(loc - 1, 0x7c636a14); - } else { - errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment"); - } + case R_PPC64_TLSGD: + write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop + write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 return; - } default: llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index ea6aa3c6a12a4..4c6a70d9034e9 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1357,19 +1357,6 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (type == R_PPC64_TOC16_LO && sym.isSection() && isa(sym) && cast(sym).section->name == ".toc") ppc64noTocRelax.insert({&sym, addend}); - - if (type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) { - if (i == end) { - errorOrWarn("R_PPC64_TLSGD may not be the last relocation" + - getLocation(sec, sym, offset)); - return; - } - - // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC case, - // so we can discern it later from the toc-case. - if (i->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC) - ++offset; - } } // Relax relocations. diff --git a/lld/test/ELF/ppc64-tls-pcrel-gd.s b/lld/test/ELF/ppc64-tls-pcrel-gd.s deleted file mode 100644 index 2220d91fe798e..0000000000000 --- a/lld/test/ELF/ppc64-tls-pcrel-gd.s +++ /dev/null @@ -1,94 +0,0 @@ -# REQUIRES: ppc -# RUN: split-file %s %t - -# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/asm -o %t.o -# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t-defs.o -# RUN: ld.lld --shared %t-defs.o -o %t-defs.so -# RUN: ld.lld -T %t/lds --shared %t.o -o %t-gd.so -# RUN: ld.lld -T %t/lds %t.o %t-defs.so -o %t-gdtoie -# RUN: ld.lld -T %t/lds %t.o %t-defs.o -o %t-gdtole - -# RUN: llvm-readelf -r %t-gd.so | FileCheck %s --check-prefix=GD-RELOC -# RUN: llvm-readelf -s %t-gd.so | FileCheck %s --check-prefix=GD-SYM -# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gd.so | FileCheck %s --check-prefix=GD - -# RUN: llvm-readelf -r %t-gdtoie | FileCheck %s --check-prefix=GDTOIE-RELOC -# RUN: llvm-readelf -s %t-gdtoie | FileCheck %s --check-prefix=GDTOIE-SYM -# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gdtoie | FileCheck %s --check-prefix=GDTOIE - -# RUN: llvm-readelf -r %t-gdtole | FileCheck %s --check-prefix=GDTOLE-RELOC -# RUN: llvm-readelf -s %t-gdtole | FileCheck %s --check-prefix=GDTOLE-SYM -# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gdtole | FileCheck %s --check-prefix=GDTOLE - -## This test checks the General Dynamic PC Relative TLS implementation for lld. -## GD - General Dynamic with no relaxation possible -## GDTOIE - General Dynamic relaxed to Initial Exec -## GDTOLE - General Dynamic relaxed to Local Exec - -#--- lds -SECTIONS { - .text_addr 0x1001000 : { *(.text_addr) } -} - -#--- defs -.section .tbss,"awT",@nobits -.globl x -x: - .long 0 -.globl y -y: - .long 0 - -#--- asm - -# GD-RELOC: Relocation section '.rela.dyn' at offset 0x100b8 contains 4 entries: -# GD-RELOC: 0000000001001160 0000000200000044 R_PPC64_DTPMOD64 0000000000000000 x + 0 -# GD-RELOC: 0000000001001168 000000020000004e R_PPC64_DTPREL64 0000000000000000 x + 0 -# GD-RELOC: 0000000001001170 0000000300000044 R_PPC64_DTPMOD64 0000000000000000 y + 0 -# GD-RELOC: 0000000001001178 000000030000004e R_PPC64_DTPREL64 0000000000000000 y + 0 - -# GD-SYM: Symbol table '.dynsym' contains 4 entries: -# GD-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x -# GD-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT UND y - - -# GDTOIE-RELOC: Relocation section '.rela.dyn' at offset 0x10118 contains 2 entries: -# GDTOIE-RELOC: 00000000010010e0 0000000200000049 R_PPC64_TPREL64 0000000000000000 x + 0 -# GDTOIE-RELOC: 00000000010010e8 0000000300000049 R_PPC64_TPREL64 0000000000000000 y + 0 - -# GDTOIE-SYM: Symbol table '.dynsym' contains 4 entries: -# GDTOIE-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x -# GDTOIE-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT UND y - - -# GDTOLE-RELOC: There are no relocations in this file. - -# GDTOLE-SYM: Symbol table '.symtab' contains 5 entries: -# GDTOLE-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT 3 x -# GDTOLE-SYM: 4: 0000000000000004 0 TLS GLOBAL DEFAULT 3 y - -# GD-LABEL: : -# GD-NEXT: paddi 3, 0, 352, 1 -# GD-NEXT: bl -# GD-NEXT: paddi 3, 0, 356, 1 -# GD-NEXT: bl -# GD-NEXT: blr -# GDTOIE-LABEL: : -# GDTOIE-NEXT: pld 3, 224(0), 1 -# GDTOIE-NEXT: add 3, 3, 13 -# GDTOIE-NEXT: pld 3, 220(0), 1 -# GDTOIE-NEXT: add 3, 3, 13 -# GDTOIE-NEXT: blr -# GDTOLE-LABEL: : -# GDTOLE-NEXT: paddi 3, 13, -28672, 0 -# GDTOLE-NEXT: nop -# GDTOLE-NEXT: paddi 3, 13, -28668, 0 -# GDTOLE-NEXT: nop -# GDTOLE-NEXT: blr -.section .text_addr, "ax", %progbits -GDTwoVal: - paddi 3, 0, x@got@tlsgd@pcrel, 1 - bl __tls_get_addr@notoc(x@tlsgd) - paddi 3, 0, y@got@tlsgd@pcrel, 1 - bl __tls_get_addr@notoc(y@tlsgd) - blr From 499260c03b916920d77c5833022937fd0e20d2c0 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 1 Oct 2020 11:27:32 -0700 Subject: [PATCH 304/544] Revert "[CFGuard] Add address-taken IAT tables and delay-load support" This reverts commit ef4e971e5e18ae796466623df8f26265ba6bdfb5. --- lld/COFF/DLL.cpp | 10 -- lld/COFF/ICF.cpp | 2 +- lld/COFF/InputFiles.cpp | 2 - lld/COFF/InputFiles.h | 7 +- lld/COFF/Symbols.h | 7 -- lld/COFF/Writer.cpp | 46 +------ lld/test/COFF/giats.s | 117 ------------------ llvm/include/llvm/MC/MCObjectFileInfo.h | 2 - llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp | 47 ++----- llvm/lib/MC/MCObjectFileInfo.cpp | 5 - llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll | 22 ---- llvm/tools/llvm-readobj/COFFDumper.cpp | 10 -- 12 files changed, 18 insertions(+), 259 deletions(-) delete mode 100644 lld/test/COFF/giats.s delete mode 100644 llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index e88a6b1bffb06..50301ad91b1d5 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -19,7 +19,6 @@ #include "DLL.h" #include "Chunks.h" -#include "SymbolTable.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" @@ -654,18 +653,9 @@ void DelayLoadContents::create(Defined *h) { auto *c = make(extName, 0); names.push_back(make(c)); hintNames.push_back(c); - // Add a syntentic symbol for this load thunk, using the "__imp_load" - // prefix, in case this thunk needs to be added to the list of valid - // call targets for Control Flow Guard. - StringRef symName = saver.save("__imp_load_" + extName); - s->loadThunkSym = - cast(symtab->addSynthetic(symName, t)); } } thunks.push_back(tm); - StringRef tmName = - saver.save("__tailMerge_" + syms[0]->getDLLName().lower()); - symtab->addSynthetic(tmName, tm); // Terminate with null values. addresses.push_back(make(8)); names.push_back(make(8)); diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 386f861fb27fb..1b33634b63d6a 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -131,7 +131,7 @@ bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { auto considerForICF = [](const SectionChunk &assoc) { StringRef Name = assoc.getSectionName(); return !(Name.startswith(".debug") || Name == ".gfids$y" || - Name == ".giats$y" || Name == ".gljmp$y"); + Name == ".gljmp$y"); }; auto ra = make_filter_range(a->children(), considerForICF); auto rb = make_filter_range(b->children(), considerForICF); diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 37f66131620e6..aaa00d0f7279a 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -280,8 +280,6 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber, debugChunks.push_back(c); else if (name == ".gfids$y") guardFidChunks.push_back(c); - else if (name == ".giats$y") - guardIATChunks.push_back(c); else if (name == ".gljmp$y") guardLJmpChunks.push_back(c); else if (name == ".sxdata") diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 26a6e5b7b70d9..0a5114b165f0c 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -144,7 +144,6 @@ class ObjFile : public InputFile { ArrayRef getDebugChunks() { return debugChunks; } ArrayRef getSXDataChunks() { return sxDataChunks; } ArrayRef getGuardFidChunks() { return guardFidChunks; } - ArrayRef getGuardIATChunks() { return guardIATChunks; } ArrayRef getGuardLJmpChunks() { return guardLJmpChunks; } ArrayRef getSymbols() { return symbols; } @@ -284,11 +283,9 @@ class ObjFile : public InputFile { // 32-bit x86. std::vector sxDataChunks; - // Chunks containing symbol table indices of address taken symbols, address - // taken IAT entries, and longjmp targets. These are not linked into the - // final binary when /guard:cf is set. + // Chunks containing symbol table indices of address taken symbols and longjmp + // targets. These are not linked into the final binary when /guard:cf is set. std::vector guardFidChunks; - std::vector guardIATChunks; std::vector guardLJmpChunks; // This vector contains a list of all symbols defined or referenced by this diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 370f72745900d..1da4df3669662 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -343,13 +343,6 @@ class DefinedImportData : public Defined { uint16_t getOrdinal() { return file->hdr->OrdinalHint; } ImportFile *file; - - // This is a pointer to the synthetic symbol associated with the load thunk - // for this symbol that will be called if the DLL is delay-loaded. This is - // needed for Control Flow Guard because if this DefinedImportData symbol is a - // valid call target, the corresponding load thunk must also be marked as a - // valid call target. - DefinedSynthetic *loadThunkSym; }; // This class represents a symbol for a jump table entry which jumps diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index b437a681483ff..d1081b008ea40 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -227,9 +227,6 @@ class Writer { void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); - void getSymbolsFromSections(ObjFile *file, - ArrayRef symIdxChunks, - std::vector &symbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym); void setSectionPermissions(); @@ -608,9 +605,8 @@ void Writer::run() { createImportTables(); createSections(); - appendImportThunks(); - // Import thunks must be added before the Control Flow Guard tables are added. createMiscChunks(); + appendImportThunks(); createExportTable(); mergeSections(); removeUnusedSections(); @@ -1622,8 +1618,6 @@ static void markSymbolsWithRelocations(ObjFile *file, // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; - SymbolRVASet giatsRVASet; - std::vector giatsSymbols; SymbolRVASet longJmpTargets; for (ObjFile *file : ObjFile::instances) { // If the object was compiled with /guard:cf, the address taken symbols @@ -1633,8 +1627,6 @@ void Writer::createGuardCFTables() { // possibly address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); - markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet); - getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); @@ -1649,16 +1641,6 @@ void Writer::createGuardCFTables() { for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); - // For each entry in the .giats table, check if it has a corresponding load - // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if - // so, add the load thunk to the address taken (.gfids) table. - for (Symbol *s : giatsSymbols) { - if (auto *di = dyn_cast(s)) { - if (di->loadThunkSym) - addSymbolToRVASet(addressTakenSyms, di->loadThunkSym); - } - } - // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) @@ -1667,10 +1649,6 @@ void Writer::createGuardCFTables() { maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); - // Add the Guard Address Taken IAT Entry Table (.giats). - maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table", - "__guard_iat_count"); - // Add the longjmp target table unless the user told us not to. if (config->guardCF == GuardCFLevel::Full) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", @@ -1687,11 +1665,11 @@ void Writer::createGuardCFTables() { } // Take a list of input sections containing symbol table indices and add those -// symbols to a vector. The challenge is that symbol RVAs are not known and +// symbols to an RVA table. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. -void Writer::getSymbolsFromSections(ObjFile *file, +void Writer::markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, - std::vector &symbols) { + SymbolRVASet &tableSymbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. @@ -1709,7 +1687,7 @@ void Writer::getSymbolsFromSections(ObjFile *file, } // Read each symbol table index and check if that symbol was included in the - // final link. If so, add it to the vector of symbols. + // final link. If so, add it to the table symbol set. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); @@ -1721,24 +1699,12 @@ void Writer::getSymbolsFromSections(ObjFile *file, } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) - symbols.push_back(cast(s)); + addSymbolToRVASet(tableSymbols, cast(s)); } } } } -// Take a list of input sections containing symbol table indices and add those -// symbols to an RVA table. -void Writer::markSymbolsForRVATable(ObjFile *file, - ArrayRef symIdxChunks, - SymbolRVASet &tableSymbols) { - std::vector syms; - getSymbolsFromSections(file, symIdxChunks, syms); - - for (Symbol *s : syms) - addSymbolToRVASet(tableSymbols, cast(s)); -} - // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. diff --git a/lld/test/COFF/giats.s b/lld/test/COFF/giats.s deleted file mode 100644 index f18720f3692fa..0000000000000 --- a/lld/test/COFF/giats.s +++ /dev/null @@ -1,117 +0,0 @@ -# REQUIRES: x86 - -# Make a DLL that exports exportfn1. -# RUN: yaml2obj %p/Inputs/export.yaml -o %basename_t-exp.obj -# RUN: lld-link /out:%basename_t-exp.dll /dll %basename_t-exp.obj /export:exportfn1 /implib:%basename_t-exp.lib - -# Make an object file that imports exportfn1. -# RUN: llvm-mc -triple x86_64-windows-msvc %s -filetype=obj -o %basename_t.obj - -# Check that the Guard address-taken IAT entry tables are propagated to the final executable. -# RUN: lld-link %basename_t.obj -guard:cf -entry:main -out:%basename_t-nodelay.exe %basename_t-exp.lib -# RUN: llvm-readobj --file-headers --coff-load-config %basename_t-nodelay.exe | FileCheck %s --check-prefix CHECK - -# CHECK: ImageBase: 0x140000000 -# CHECK: LoadConfig [ -# CHECK: GuardCFFunctionTable: 0x140002114 -# CHECK: GuardCFFunctionCount: 1 -# CHECK: GuardFlags: 0x10500 -# CHECK: GuardAddressTakenIatEntryTable: 0x140002118 -# CHECK: GuardAddressTakenIatEntryCount: 1 -# CHECK: ] -# CHECK: GuardFidTable [ -# CHECK-NEXT: 0x14000{{.*}} -# CHECK-NEXT: ] -# CHECK: GuardIatTable [ -# CHECK-NEXT: 0x14000{{.*}} -# CHECK-NEXT: ] - - -# Check that the additional load thunk symbol is added to the GFIDs table. -# RUN: lld-link %basename_t.obj -guard:cf -entry:main -out:%basename_t-delay.exe %basename_t-exp.lib -alternatename:__delayLoadHelper2=main -delayload:%basename_t-exp.dll -# RUN: llvm-readobj --file-headers --coff-load-config %basename_t-delay.exe | FileCheck %s --check-prefix DELAY-CHECK - -# DELAY-CHECK: ImageBase: 0x140000000 -# DELAY-CHECK: LoadConfig [ -# DELAY-CHECK: GuardCFFunctionTable: 0x140002114 -# DELAY-CHECK: GuardCFFunctionCount: 2 -# DELAY-CHECK: GuardFlags: 0x10500 -# DELAY-CHECK: GuardAddressTakenIatEntryTable: 0x14000211C -# DELAY-CHECK: GuardAddressTakenIatEntryCount: 1 -# DELAY-CHECK: ] -# DELAY-CHECK: GuardFidTable [ -# DELAY-CHECK-NEXT: 0x14000{{.*}} -# DELAY-CHECK-NEXT: 0x14000{{.*}} -# DELAY-CHECK-NEXT: ] -# DELAY-CHECK: GuardIatTable [ -# DELAY-CHECK-NEXT: 0x14000{{.*}} -# DELAY-CHECK-NEXT: ] - - -# This assembly is reduced from C code like: -# __declspec(noinline) -# void IndirectCall(BOOL (func)(HANDLE)) { -# (*func)(NULL); -# } -# int main(int argc, char** argv) { -# IndirectCall(exportfn1); -# } - - .text - .def @feat.00; - .scl 3; - .type 0; - .endef - .globl @feat.00 -.set @feat.00, 2048 - .def IndirectCall; .scl 2; .type 32; .endef - .globl IndirectCall # -- Begin function IndirectCall - .p2align 4, 0x90 -IndirectCall: # @IndirectCall -# %bb.0: - subq $40, %rsp - movq %rcx, 32(%rsp) - movq 32(%rsp), %rax - movq %rax, %rdx # This would otherwise have be: movq __guard_dispatch_icall_fptr(%rip), %rdx - xorl %ecx, %ecx - callq *%rdx - nop - addq $40, %rsp - retq - # -- End function - .def main; .scl 2; .type 32; .endef - .globl main # -- Begin function main - .p2align 4, 0x90 -main: # @main -# %bb.0: - subq $56, %rsp - movq __imp_exportfn1(%rip), %rax - movq %rdx, 48(%rsp) - movl %ecx, 44(%rsp) - movq %rax, %rcx - callq IndirectCall - xorl %eax, %eax - addq $56, %rsp - retq - # -- End function - .section .gfids$y,"dr" - .section .giats$y,"dr" - .symidx __imp_exportfn1 - .section .gljmp$y,"dr" - -# Load configuration directory entry (winnt.h _IMAGE_LOAD_CONFIG_DIRECTORY64). -# The linker will define the __guard_* symbols. - .section .rdata,"dr" -.globl _load_config_used -_load_config_used: - .long 256 - .fill 124, 1, 0 - .quad __guard_fids_table - .quad __guard_fids_count - .long __guard_flags - .fill 12, 1, 0 - .quad __guard_iat_table - .quad __guard_iat_count - .quad __guard_longjmp_table - .quad __guard_fids_count - .fill 84, 1, 0 \ No newline at end of file diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 316086833d975..8c6bcba2332b1 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -215,7 +215,6 @@ class MCObjectFileInfo { MCSection *XDataSection = nullptr; MCSection *SXDataSection = nullptr; MCSection *GFIDsSection = nullptr; - MCSection *GIATsSection = nullptr; MCSection *GLJMPSection = nullptr; // XCOFF specific sections @@ -399,7 +398,6 @@ class MCObjectFileInfo { MCSection *getXDataSection() const { return XDataSection; } MCSection *getSXDataSection() const { return SXDataSection; } MCSection *getGFIDsSection() const { return GFIDsSection; } - MCSection *getGIATsSection() const { return GIATsSection; } MCSection *getGLJMPSection() const { return GLJMPSection; } // XCOFF specific sections diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 09bcf5cb25a21..914308d9147e2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file contains support for writing the metadata for Windows Control Flow -// Guard, including address-taken functions and valid longjmp targets. +// Guard, including address-taken functions, and valid longjmp targets. // //===----------------------------------------------------------------------===// @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -78,49 +78,20 @@ static bool isPossibleIndirectCallTarget(const Function *F) { return false; } -/// Returns true if this function should be added to the Guard Address Taken IAT -/// Entry Table (GIATs) instead of the Guard Function ID Table (GFIDs). -static bool isIATAddressTaken(const Function *F) { - if (F->hasDLLImportStorageClass()) { - return true; - } - return false; -} - void WinCFGuard::endModule() { const Module *M = Asm->MMI->getModule(); - std::vector GFIDsEntries; - std::vector GIATsEntries; - for (const Function &F : *M) { - if (isPossibleIndirectCallTarget(&F)) { - if (isIATAddressTaken(&F)) { - // If the possible call target is reached via the IAT, add it to the - // GIATs table instead of the GFIDs table. - GIATsEntries.push_back(&F); - } else { - // Otherwise add it to the GFIDs table. - GFIDsEntries.push_back(&F); - } - } - } - - if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty()) + std::vector Functions; + for (const Function &F : *M) + if (isPossibleIndirectCallTarget(&F)) + Functions.push_back(&F); + if (Functions.empty() && LongjmpTargets.empty()) return; - - // Emit the symbol index of each GFIDs entry to form the GFIDs table. auto &OS = *Asm->OutStreamer; OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); - for (const Function *F : GFIDsEntries) + for (const Function *F : Functions) OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); - // Emit the symbol index of each GIATs entry to form the GIATs table. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); - for (const Function *F : GIATsEntries) { - OS.EmitCOFFSymbolIndex(Asm->OutContext.getOrCreateSymbol( - Twine("__imp_") + Asm->getSymbol(F)->getName())); - } - - // Emit the symbol index of each longjmp target to form the GLJMP table. + // Emit the symbol index of each longjmp target. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { OS.EmitCOFFSymbolIndex(S); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index eec2615974b57..ae7345c4e05b9 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -752,11 +752,6 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); - GIATsSection = Ctx->getCOFFSection(".giats$y", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); - GLJMPSection = Ctx->getCOFFSection(".gljmp$y", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, diff --git a/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll b/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll deleted file mode 100644 index 0ac436cc6add5..0000000000000 --- a/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s -; Control Flow Guard is currently only available on Windows - -declare dllimport i32 @target_func() - -; Test address-taken functions from imported DLLs are added to the -; Guard Address-Taken IAT Entry table (.giats). -define i32 @func_cf_giats() { -entry: - %func_ptr = alloca i32 ()*, align 8 - store i32 ()* @target_func, i32 ()** %func_ptr, align 8 - %0 = load i32 ()*, i32 ()** %func_ptr, align 8 - %1 = call i32 %0() - ret i32 %1 -} - -!llvm.module.flags = !{!0} -!0 = !{i32 2, !"cfguard", i32 2} - -; CHECK-LABEL: .section .giats$y,"dr" -; CHECK-NEXT: .symidx __imp_target_func -; CHECK-NOT: .symidx \ No newline at end of file diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index b4fb2e52cb199..22e27b3e5a29e 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -67,8 +67,6 @@ struct LoadConfigTables { uint32_t GuardFlags = 0; uint64_t GuardFidTableVA = 0; uint64_t GuardFidTableCount = 0; - uint64_t GuardIatTableVA = 0; - uint64_t GuardIatTableCount = 0; uint64_t GuardLJmpTableVA = 0; uint64_t GuardLJmpTableCount = 0; }; @@ -806,11 +804,6 @@ void COFFDumper::printCOFFLoadConfig() { } } - if (Tables.GuardIatTableVA) { - ListScope LS(W, "GuardIatTable"); - printRVATable(Tables.GuardIatTableVA, Tables.GuardIatTableCount, 4); - } - if (Tables.GuardLJmpTableVA) { ListScope LS(W, "GuardLJmpTable"); printRVATable(Tables.GuardLJmpTableVA, Tables.GuardLJmpTableCount, 4); @@ -895,9 +888,6 @@ void COFFDumper::printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables) { Conf->GuardRFVerifyStackPointerFunctionPointer); W.printHex("HotPatchTableOffset", Conf->HotPatchTableOffset); - Tables.GuardIatTableVA = Conf->GuardAddressTakenIatEntryTable; - Tables.GuardIatTableCount = Conf->GuardAddressTakenIatEntryCount; - Tables.GuardLJmpTableVA = Conf->GuardLongJumpTargetTable; Tables.GuardLJmpTableCount = Conf->GuardLongJumpTargetCount; } From 149f5b573c79eac0c519ada4d2f7c50e17796cdf Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 1 Oct 2020 14:23:18 -0400 Subject: [PATCH 305/544] [APFloat] convert SNaN to QNaN in convert() and raise Invalid signal This is an alternate fix (see D87835) for a bug where a NaN constant gets wrongly transformed into Infinity via truncation. In this patch, we uniformly convert any SNaN to QNaN while raising 'invalid op'. But we don't have a way to directly specify a 32-bit SNaN value in LLVM IR, so those are always encoded/decoded by calling convert from/to 64-bit hex. See D88664 for a clang fix needed to allow this change. Differential Revision: https://reviews.llvm.org/D88238 --- clang/test/CodeGen/builtin-nan-exception.c | 6 ++++- clang/test/CodeGen/builtin-nan-legacy.c | 10 ++++++- clang/test/CodeGen/mips-unsupported-nan.c | 16 ++++++++++- llvm/lib/AsmParser/LLParser.cpp | 10 +++++++ llvm/lib/IR/AsmWriter.cpp | 14 ++++++++-- llvm/lib/Support/APFloat.cpp | 27 ++++++------------- .../Transforms/InstSimplify/ConstProp/cast.ll | 8 +++--- .../Transforms/PhaseOrdering/X86/nancvt.ll | 15 ++++++----- llvm/unittests/ADT/APFloatTest.cpp | 14 +++++----- 9 files changed, 80 insertions(+), 40 deletions(-) diff --git a/clang/test/CodeGen/builtin-nan-exception.c b/clang/test/CodeGen/builtin-nan-exception.c index a0de25e52ebe6..7445411ddf89e 100644 --- a/clang/test/CodeGen/builtin-nan-exception.c +++ b/clang/test/CodeGen/builtin-nan-exception.c @@ -17,8 +17,12 @@ float f[] = { // Doubles are created and converted to floats. +// Converting (truncating) to float quiets the NaN (sets the MSB +// of the significand) and raises the APFloat invalidOp exception +// but that should not cause a compilation error in the default +// (ignore FP exceptions) mode. -// CHECK: float 0x7FF8000000000000, float 0x7FF4000000000000 +// CHECK: float 0x7FF8000000000000, float 0x7FFC000000000000 float converted_to_float[] = { __builtin_nan(""), diff --git a/clang/test/CodeGen/builtin-nan-legacy.c b/clang/test/CodeGen/builtin-nan-legacy.c index cd0f0fd14f14c..de6c15379a4dd 100644 --- a/clang/test/CodeGen/builtin-nan-legacy.c +++ b/clang/test/CodeGen/builtin-nan-legacy.c @@ -1,7 +1,15 @@ // RUN: %clang -target mipsel-unknown-linux -mnan=legacy -emit-llvm -S %s -o - | FileCheck %s -// CHECK: float 0x7FF4000000000000, float 0x7FF8000000000000 +// CHECK: float 0x7FFC000000000000, float 0x7FF8000000000000 // CHECK: double 0x7FF4000000000000, double 0x7FF8000000000000 +// The first line shows an unintended consequence. +// __builtin_nan() creates a legacy QNAN double with an empty payload +// (the first bit of the significand is clear to indicate quiet, so +// the second bit of the payload is set to maintain NAN-ness). +// The value is then truncated, but llvm::APFloat does not know about +// the inverted quiet bit, so it sets the first bit on conversion +// to indicate 'quiet' independently of the setting in clang. + float f[] = { __builtin_nan(""), __builtin_nans(""), diff --git a/clang/test/CodeGen/mips-unsupported-nan.c b/clang/test/CodeGen/mips-unsupported-nan.c index 2fd5042e92f8e..16cea3c2e7e18 100644 --- a/clang/test/CodeGen/mips-unsupported-nan.c +++ b/clang/test/CodeGen/mips-unsupported-nan.c @@ -39,7 +39,21 @@ // CHECK-MIPS64: warning: ignoring '-mnan=2008' option because the 'mips64' architecture does not support it // CHECK-MIPS64R6: warning: ignoring '-mnan=legacy' option because the 'mips64r6' architecture does not support it -// CHECK-NANLEGACY: float 0x7FF4000000000000 +// This call creates a QNAN double with an empty payload. +// The quiet bit is inverted in legacy mode: it is clear to indicate QNAN, +// so the next highest bit is set to maintain NAN (not infinity). +// In regular (2008) mode, the quiet bit is set to indicate QNAN. + +// CHECK-NANLEGACY: double 0x7FF4000000000000 +// CHECK-NAN2008: double 0x7FF8000000000000 + +double d = __builtin_nan(""); + +// This call creates a QNAN double with an empty payload and then truncates. +// llvm::APFloat does not know about the inverted quiet bit, so it sets the +// quiet bit on conversion independently of the setting in clang. + +// CHECK-NANLEGACY: float 0x7FFC000000000000 // CHECK-NAN2008: float 0x7FF8000000000000 float f = __builtin_nan(""); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 63f8531dbdced..4e1ae4faa4e19 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -5345,6 +5345,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, // The lexer has no type info, so builds all half, bfloat, float, and double // FP constants as double. Fix this here. Long double does not need this. if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble()) { + // Check for signaling before potentially converting and losing that info. + bool IsSNAN = ID.APFloatVal.isSignaling(); bool Ignored; if (Ty->isHalfTy()) ID.APFloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, @@ -5355,6 +5357,14 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, else if (Ty->isFloatTy()) ID.APFloatVal.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Ignored); + if (IsSNAN) { + // The convert call above may quiet an SNaN, so manufacture another + // SNaN. The bitcast works because the payload (significand) parameter + // is truncated to fit. + APInt Payload = ID.APFloatVal.bitcastToAPInt(); + ID.APFloatVal = APFloat::getSNaN(ID.APFloatVal.getSemantics(), + ID.APFloatVal.isNegative(), &Payload); + } } V = ConstantFP::get(Context, ID.APFloatVal); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 8cb1883da68e4..550aa1395bef3 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1373,9 +1373,19 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, "assuming that double is 64 bits!"); APFloat apf = APF; // Floats are represented in ASCII IR as double, convert. - if (!isDouble) + // FIXME: We should allow 32-bit hex float and remove this. + if (!isDouble) { + // A signaling NaN is quieted on conversion, so we need to recreate the + // expected value after convert (quiet bit of the payload is clear). + bool IsSNAN = apf.isSignaling(); apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, - &ignored); + &ignored); + if (IsSNAN) { + APInt Payload = apf.bitcastToAPInt(); + apf = APFloat::getSNaN(APFloat::IEEEdouble(), apf.isNegative(), + &Payload); + } + } Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true); return; } diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 58e49b5384cd5..c79fc8a63de19 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -2243,26 +2243,15 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, if (!X86SpecialNan && semantics == &semX87DoubleExtended) APInt::tcSetBit(significandParts(), semantics->precision - 1); - // If we are truncating NaN, it is possible that we shifted out all of the - // set bits in a signalling NaN payload. But NaN must remain NaN, so some - // bit in the significand must be set (otherwise it is Inf). - // This can only happen with sNaN. Set the 1st bit after the quiet bit, - // so that we still have an sNaN. - // FIXME: Set quiet and return opInvalidOp (on convert of any sNaN). - // But this requires fixing LLVM to parse 32-bit hex FP or ignoring - // conversions while parsing IR. - if (APInt::tcIsZero(significandParts(), newPartCount)) { - assert(shift < 0 && "Should not lose NaN payload on extend"); - assert(semantics->precision >= 3 && "Unexpectedly narrow significand"); - assert(*losesInfo && "Missing payload should have set lost info"); - APInt::tcSetBit(significandParts(), semantics->precision - 3); + // Convert of sNaN creates qNaN and raises an exception (invalid op). + // This also guarantees that a sNaN does not become Inf on a truncation + // that loses all payload bits. + if (isSignaling()) { + makeQuiet(); + fs = opInvalidOp; + } else { + fs = opOK; } - - // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) - // does not give you back the same bits. This is dubious, and we - // don't currently do it. You're really supposed to get - // an invalid operation signal at runtime, but nobody does that. - fs = opOK; } else { *losesInfo = false; fs = opOK; diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll index 41765be1f2c89..adf5e4b68a1b2 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll @@ -40,24 +40,24 @@ define float @overflow_sitofp() { } ; https://llvm.org/PR43907 - make sure that NaN doesn't morph into Inf. -; SNaN remains SNaN. +; SNaN becomes QNaN. define float @nan_f64_trunc() { ; CHECK-LABEL: @nan_f64_trunc( -; CHECK-NEXT: ret float 0x7FF4000000000000 +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %f = fptrunc double 0x7FF0000000000001 to float ret float %f } ; Verify again with a vector and different destination type. -; SNaN remains SNaN (first two elements). +; SNaN becomes SNaN (first two elements). ; QNaN remains QNaN (third element). ; Lower 42 bits of NaN source payload are lost. define <3 x half> @nan_v3f64_trunc() { ; CHECK-LABEL: @nan_v3f64_trunc( -; CHECK-NEXT: ret <3 x half> +; CHECK-NEXT: ret <3 x half> ; %f = fptrunc <3 x double> to <3 x half> ret <3 x half> %f diff --git a/llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll b/llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll index c87390c268c91..1ea183b46a4bf 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll @@ -18,6 +18,9 @@ target triple = "i686-apple-darwin8" @var = external global i32 +; SNAN becomes QNAN on fptrunc: +; 2147228864 = 0x7ffc1cc0 : QNAN + define i32 @main() { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: @@ -30,15 +33,15 @@ define i32 @main() { ; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 -; CHECK-NEXT: store volatile i32 2146502828, i32* @var, align 4 +; CHECK-NEXT: store volatile i32 2147027116, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 -1610612736, i32* @var, align 4 -; CHECK-NEXT: store volatile i32 2146502828, i32* @var, align 4 +; CHECK-NEXT: store volatile i32 2147027116, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 -2147483648, i32* @var, align 4 -; CHECK-NEXT: store volatile i32 2146502828, i32* @var, align 4 +; CHECK-NEXT: store volatile i32 2147027116, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 -1073741824, i32* @var, align 4 -; CHECK-NEXT: store volatile i32 2143034560, i32* @var, align 4 -; CHECK-NEXT: store volatile i32 2143034560, i32* @var, align 4 -; CHECK-NEXT: store volatile i32 2143034560, i32* @var, align 4 +; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 +; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 +; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 475ad83e2d9d1..2088df0b4d3f2 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -1816,11 +1816,12 @@ TEST(APFloatTest, convert) { EXPECT_FALSE(losesInfo); test = APFloat::getSNaN(APFloat::IEEEsingle()); - APFloat X87SNaN = APFloat::getSNaN(APFloat::x87DoubleExtended()); APFloat::opStatus status = test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &losesInfo); - EXPECT_TRUE(test.bitwiseIsEqual(X87SNaN)); + // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set. + APInt topTwoBits(64, 0x6000000000000000); + EXPECT_TRUE(test.bitwiseIsEqual(APFloat::getQNaN(APFloat::x87DoubleExtended(), false, &topTwoBits))); EXPECT_FALSE(losesInfo); - EXPECT_EQ(status, APFloat::opOK); + EXPECT_EQ(status, APFloat::opInvalidOp); test = APFloat::getQNaN(APFloat::IEEEsingle()); APFloat X87QNaN = APFloat::getQNaN(APFloat::x87DoubleExtended()); @@ -1832,6 +1833,7 @@ TEST(APFloatTest, convert) { test = APFloat::getSNaN(APFloat::x87DoubleExtended()); test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &losesInfo); + APFloat X87SNaN = APFloat::getSNaN(APFloat::x87DoubleExtended()); EXPECT_TRUE(test.bitwiseIsEqual(X87SNaN)); EXPECT_FALSE(losesInfo); @@ -1841,13 +1843,13 @@ TEST(APFloatTest, convert) { EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN)); EXPECT_FALSE(losesInfo); - // The payload is lost in truncation, but we must retain NaN, so we set the bit after the quiet bit. + // The payload is lost in truncation, but we retain NaN by setting the quiet bit. APInt payload(52, 1); test = APFloat::getSNaN(APFloat::IEEEdouble(), false, &payload); status = test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); - EXPECT_EQ(0x7fa00000, test.bitcastToAPInt()); + EXPECT_EQ(0x7fc00000, test.bitcastToAPInt()); EXPECT_TRUE(losesInfo); - EXPECT_EQ(status, APFloat::opOK); + EXPECT_EQ(status, APFloat::opInvalidOp); // The payload is lost in truncation. QNaN remains QNaN. test = APFloat::getQNaN(APFloat::IEEEdouble(), false, &payload); From 8d26760a95bae34aa5c1161a1c2ab8c1cdaa10a1 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Mon, 28 Sep 2020 16:12:48 -0700 Subject: [PATCH 306/544] [CMake] Use -isystem flag to access libc++ headers This is a partial revert of D62155. Rather than copying libc++ headers into the build directory to be later overwritten by the final headers, use -isystem flag to access libc++ headers during CMake checks. This should address the occasional flake we've seen, especially on Windows builders where CMake fails to overwrite __config with the final version. Differential Revision: https://reviews.llvm.org/D88454 --- libcxx/include/CMakeLists.txt | 34 +++++++++++++--------------------- llvm/runtimes/CMakeLists.txt | 28 +++++++++++++--------------- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index be8141c981667..7c97db41bb73a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -202,14 +202,6 @@ add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config add_custom_target(cxx-generated-config ALL DEPENDS ${LIBCXX_BINARY_DIR}/__generated_config) -# In some build configurations (like bootstrapping clang), we need to be able to -# install the libcxx headers before the CMake configuration for libcxx runs. Making -# the name of this target configurable allows LLVM/runtimes/CMakeLists.txt to -# add this subdirectory to the LLVM build to put libcxx's headers in place -# before libcxx's build configuration is run. -if (NOT CXX_HEADER_TARGET) - set(CXX_HEADER_TARGET cxx-headers) -endif() if(LIBCXX_HEADER_DIR) set(output_dir ${LIBCXX_HEADER_DIR}/include/c++/v1) @@ -234,23 +226,23 @@ if(LIBCXX_HEADER_DIR) list(APPEND out_files ${dst}) add_custom_target(generate-cxx-headers DEPENDS ${out_files}) - add_library(${CXX_HEADER_TARGET} INTERFACE) - add_dependencies(${CXX_HEADER_TARGET} generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET}) + add_library(cxx-headers INTERFACE) + add_dependencies(cxx-headers generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET}) # TODO: Use target_include_directories once we figure out why that breaks the runtimes build if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(${CXX_HEADER_TARGET} INTERFACE /I "${output_dir}") + target_compile_options(cxx-headers INTERFACE /I "${output_dir}") else() - target_compile_options(${CXX_HEADER_TARGET} INTERFACE -I "${output_dir}") + target_compile_options(cxx-headers INTERFACE -I "${output_dir}") endif() # Make sure the generated __config_site header is included when we build the library. if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC") - target_compile_options(${CXX_HEADER_TARGET} INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site") + target_compile_options(cxx-headers INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site") else() - target_compile_options(${CXX_HEADER_TARGET} INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site") + target_compile_options(cxx-headers INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site") endif() else() - add_library(${CXX_HEADER_TARGET} INTERFACE) + add_library(cxx-headers INTERFACE) endif() if (LIBCXX_INSTALL_HEADERS) @@ -258,7 +250,7 @@ if (LIBCXX_INSTALL_HEADERS) get_filename_component(dir ${file} DIRECTORY) install(FILES ${file} DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1/${dir} - COMPONENT ${CXX_HEADER_TARGET} + COMPONENT cxx-headers PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ ) endforeach() @@ -268,15 +260,15 @@ if (LIBCXX_INSTALL_HEADERS) DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1 PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ RENAME __config - COMPONENT ${CXX_HEADER_TARGET}) + COMPONENT cxx-headers) if (NOT CMAKE_CONFIGURATION_TYPES) - add_custom_target(install-${CXX_HEADER_TARGET} - DEPENDS ${CXX_HEADER_TARGET} cxx-generated-config + add_custom_target(install-cxx-headers + DEPENDS cxx-headers cxx-generated-config COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${CXX_HEADER_TARGET} + -DCMAKE_INSTALL_COMPONENT=cxx-headers -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") # Stripping is a no-op for headers - add_custom_target(install-${CXX_HEADER_TARGET}-stripped DEPENDS install-${CXX_HEADER_TARGET}) + add_custom_target(install-cxx-headers-stripped DEPENDS install-cxx-headers) endif() endif() diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 73470074ac6cf..ecf8ac45c9e7b 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -98,9 +98,17 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) include(CheckLibraryExists) include(CheckCCompilerFlag) + include(CMakePushCheckState) - # We don't have libc++ (yet). - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++") + cmake_push_check_state() + + # We don't have libc++ (yet)... + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdinc++ -nostdlib++") + + # ...but we need access to libc++ headers for CMake checks to succeed. + if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR AND "libcxx" IN_LIST LLVM_ENABLE_RUNTIMES) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -isystem ${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}/include") + endif() # Avoid checking whether the compiler is working. set(LLVM_COMPILER_CHECKED ON) @@ -110,8 +118,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) include(HandleLLVMOptions) include(FindPythonInterp) - # Remove the -nostdlib++ option we've added earlier. - string(REPLACE "-nostdlib++" "" CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") + cmake_pop_check_state() # Use libtool instead of ar if you are both on an Apple host, and targeting Apple. if(CMAKE_HOST_APPLE AND APPLE) @@ -215,15 +222,6 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) else() # if this is included from LLVM's CMake include(LLVMExternalProjectUtils) - if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR AND "libcxx" IN_LIST LLVM_ENABLE_RUNTIMES) - # This looks wrong, but libcxx's build actually wants the header dir to be - # the root build dir, not the include directory. - set(LIBCXX_BINARY_DIR ${LLVM_BINARY_DIR}) - set(LIBCXX_SOURCE_DIR ${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}) - set(LIBCXX_HEADER_DIR ${LLVM_BINARY_DIR}) - set(CXX_HEADER_TARGET runtime-libcxx-headers) - add_subdirectory(${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}/include ${CXX_HEADER_TARGET}) - endif() if(NOT LLVM_BUILD_RUNTIMES) set(EXTRA_ARGS EXCLUDE_FROM_ALL) @@ -414,7 +412,7 @@ ${error} Set RUNTIMES_BUILD_ALLOW_DARWIN to allow a single darwin triple.") llvm_ExternalProject_Add(runtimes ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS ${ARG_DEPENDS} ${CXX_HEADER_TARGET} + DEPENDS ${ARG_DEPENDS} # Builtins were built separately above CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} @@ -520,7 +518,7 @@ ${error} Set RUNTIMES_BUILD_ALLOW_DARWIN to allow a single darwin triple.") llvm_ExternalProject_Add(runtimes-${name} ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS ${${name}_deps} ${CXX_HEADER_TARGET} + DEPENDS ${${name}_deps} # Builtins were built separately above CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} From 9d1c8c0ba94a273c53829f0800335045e547db88 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Oct 2020 20:57:09 +0200 Subject: [PATCH 307/544] [InstCombine] Fix select operand simplification with undef (PR47696) When replacing X == Y ? f(X) : Z with X == Y ? f(Y) : Z, make sure that Y cannot be undef. If it may be undef, we might end up picking a different value for undef in the comparison and the select operand. --- .../InstCombine/InstCombineInternal.h | 1 + .../InstCombine/InstCombineSelect.cpp | 33 +++++++++-------- .../InstCombine/select-binop-cmp.ll | 4 +- llvm/test/Transforms/InstCombine/select.ll | 37 ++++++++++++++++++- 4 files changed, 56 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 62ee7d00780ef..eef56c8645f83 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -711,6 +711,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C); Instruction *foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); + Instruction *foldSelectValueEquivalence(SelectInst &SI, ICmpInst &ICI); Instruction *OptAndOp(BinaryOperator *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ce473410f4caf..087586ede8088 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1165,9 +1165,8 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// /// We can't replace %sel with %add unless we strip away the flags. /// TODO: Wrapping flags could be preserved in some cases with better analysis. -static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, - const SimplifyQuery &Q, - InstCombiner &IC) { +Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, + ICmpInst &Cmp) { if (!Cmp.isEquality()) return nullptr; @@ -1179,18 +1178,20 @@ static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, Swapped = true; } - // In X == Y ? f(X) : Z, try to evaluate f(X) and replace the operand. - // Take care to avoid replacing X == Y ? X : Z with X == Y ? Y : Z, as that - // would lead to an infinite replacement cycle. + // In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand. + // Make sure Y cannot be undef though, as we might pick different values for + // undef in the icmp and in f(Y). Additionally, take care to avoid replacing + // X == Y ? X : Z with X == Y ? Y : Z, as that would lead to an infinite + // replacement cycle. Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); - if (TrueVal != CmpLHS) - if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, + if (TrueVal != CmpLHS && isGuaranteedNotToBeUndefOrPoison(CmpRHS, &Sel, &DT)) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ, /* AllowRefinement */ true)) - return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); - if (TrueVal != CmpRHS) - if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, + return replaceOperand(Sel, Swapped ? 2 : 1, V); + if (TrueVal != CmpRHS && isGuaranteedNotToBeUndefOrPoison(CmpLHS, &Sel, &DT)) + if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ, /* AllowRefinement */ true)) - return IC.replaceOperand(Sel, Swapped ? 2 : 1, V); + return replaceOperand(Sel, Swapped ? 2 : 1, V); auto *FalseInst = dyn_cast(FalseVal); if (!FalseInst) @@ -1215,11 +1216,11 @@ static Instruction *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, // We have an 'EQ' comparison, so the select's false value will propagate. // Example: // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ, /* AllowRefinement */ false) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ, /* AllowRefinement */ false) == TrueVal) { - return IC.replaceInstUsesWith(Sel, FalseVal); + return replaceInstUsesWith(Sel, FalseVal); } // Restore poison-generating flags if the transform did not apply. @@ -1455,7 +1456,7 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI, SQ, *this)) + if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) return NewSel; if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index aa450f8af8b7e..c4a9d0941b967 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -564,10 +564,12 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) ret <2 x i8> %C } +; Folding this would only be legal if we sanitized undef to 0. define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_undef( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; %A = icmp eq <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index b7c4cb5c6420b..df506477eed1c 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2641,10 +2641,24 @@ define i8 @select_replacement_add_nuw(i8 %x, i8 %y) { ret i8 %sel } +define i8 @select_replacement_sub_noundef(i8 %x, i8 noundef %y, i8 %z) { +; CHECK-LABEL: @select_replacement_sub_noundef( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %cmp = icmp eq i8 %x, %y + %sub = sub i8 %x, %y + %sel = select i1 %cmp, i8 %sub, i8 %z + ret i8 %sel +} + +; TODO: The transform is also safe without noundef. define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_sub( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 0, i8 [[Z:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SUB]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, %y @@ -2653,11 +2667,29 @@ define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { ret i8 %sel } +define i8 @select_replacement_shift_noundef(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @select_replacement_shift_noundef( +; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 +; CHECK-NEXT: call void @use_i8(i8 noundef [[SHR]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %shr = lshr exact i8 %x, 1 + call void @use_i8(i8 noundef %shr) + %cmp = icmp eq i8 %shr, %y + %shl = shl i8 %y, 1 + %sel = select i1 %cmp, i8 %shl, i8 %z + ret i8 %sel +} + +; TODO: The transform is also safe without noundef. define i8 @select_replacement_shift(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_shift( ; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[Y]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SHL]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %shr = lshr exact i8 %x, 1 @@ -2694,4 +2726,5 @@ define i32 @select_replacement_loop2(i32 %arg, i32 %arg2) { } declare void @use(i1) +declare void @use_i8(i8) declare i32 @llvm.cttz.i32(i32, i1 immarg) From 5d46d7e8b288a52de1eff97d6c5b44039ede6661 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 30 Sep 2020 14:40:53 -0700 Subject: [PATCH 308/544] [PDB] Use one func id DenseMap instead of per-source maps, NFC This avoids some DenseMap copies when /Zi is in use, and results in fewer data structures. Differential Revision: https://reviews.llvm.org/D88617 --- lld/COFF/DebugTypes.cpp | 53 ++++++++++++++++++++++------------------- lld/COFF/DebugTypes.h | 4 ++-- lld/COFF/PDB.cpp | 4 ++-- lld/COFF/TypeMerger.h | 4 ++++ 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 557bdd9c04b3d..4ce031118c82f 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -525,9 +525,6 @@ Error UsePrecompSource::mergeInPrecompHeaderObj() { precompSrc->tpiMap.begin() + precompDependency.getTypesCount()); - if (config->debugGHashes) - funcIdToType = precompSrc->funcIdToType; // FIXME: Save copy - return Error::success(); } @@ -612,7 +609,7 @@ void TpiSource::fillIsItemIndexFromDebugT() { }); } -void TpiSource::mergeTypeRecord(CVType ty) { +void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { // Decide if the merged type goes into TPI or IPI. bool isItem = isIdRecord(ty.kind()); MergedInfo &merged = isItem ? mergedIpi : mergedTpi; @@ -637,6 +634,25 @@ void TpiSource::mergeTypeRecord(CVType ty) { uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec))); merged.recSizes.push_back(static_cast(newSize)); merged.recHashes.push_back(pdbHash); + + // Retain a mapping from PDB function id to PDB function type. This mapping is + // used during symbol procesing to rewrite S_GPROC32_ID symbols to S_GPROC32 + // symbols. + if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { + bool success = ty.length() >= 12; + TypeIndex funcId = curIndex; + if (success) + success &= remapTypeIndex(funcId, TiRefKind::IndexRef); + TypeIndex funcType = + *reinterpret_cast(&newRec.data()[8]); + if (success) { + funcIdToType.push_back({funcId, funcType}); + } else { + StringRef fname = file ? file->getName() : ""; + warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) + + " in " + fname); + } + } } void TpiSource::mergeUniqueTypeRecords(ArrayRef typeRecords, @@ -655,27 +671,9 @@ void TpiSource::mergeUniqueTypeRecords(ArrayRef typeRecords, forEachTypeChecked(typeRecords, [&](const CVType &ty) { if (nextUniqueIndex != uniqueTypes.end() && *nextUniqueIndex == ghashIndex) { - mergeTypeRecord(ty); + mergeTypeRecord(beginIndex + ghashIndex, ty); ++nextUniqueIndex; } - if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) { - bool success = ty.length() >= 12; - TypeIndex srcFuncIdIndex = beginIndex + ghashIndex; - TypeIndex funcId = srcFuncIdIndex; - TypeIndex funcType; - if (success) { - funcType = *reinterpret_cast(&ty.data()[8]); - success &= remapTypeIndex(funcId, TiRefKind::IndexRef); - success &= remapTypeIndex(funcType, TiRefKind::TypeRef); - } - if (success) { - funcIdToType.insert({funcId, funcType}); - } else { - StringRef fname = file ? file->getName() : ""; - warn("corrupt LF_[M]FUNC_ID record 0x" + - utohexstr(srcFuncIdIndex.getIndex()) + " in " + fname); - } - } ++ghashIndex; }); assert(nextUniqueIndex == uniqueTypes.end() && @@ -758,7 +756,6 @@ void TypeServerSource::remapTpiWithGHashes(GHashState *g) { ipiSrc->tpiMap = tpiMap; ipiSrc->ipiMap = ipiMap; ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); - funcIdToType = ipiSrc->funcIdToType; // FIXME: Save copy } } @@ -775,7 +772,6 @@ void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { TypeServerSource *tsSrc = *maybeTsSrc; tpiMap = tsSrc->tpiMap; ipiMap = tsSrc->ipiMap; - funcIdToType = tsSrc->funcIdToType; // FIXME: Save copy } void PrecompSource::loadGHashes() { @@ -1102,6 +1098,13 @@ void TypeMerger::mergeTypesWithGHash() { source->remapTpiWithGHashes(&ghashState); }); + // Build a global map of from function ID to function type. + for (TpiSource *source : TpiSource::instances) { + for (auto idToType : source->funcIdToType) + funcIdToType.insert(idToType); + source->funcIdToType.clear(); + } + TpiSource::clearGHashes(); } diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h index 17368244e5898..ebb3b2bac6930 100644 --- a/lld/COFF/DebugTypes.h +++ b/lld/COFF/DebugTypes.h @@ -72,7 +72,7 @@ class TpiSource { void remapRecord(MutableArrayRef rec, ArrayRef typeRefs); - void mergeTypeRecord(llvm::codeview::CVType ty); + void mergeTypeRecord(TypeIndex curIndex, llvm::codeview::CVType ty); // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex // of the first record in this source, typically 0x1000. When PCHs are @@ -164,7 +164,7 @@ class TpiSource { /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function /// type index here. Both indices are PDB indices, not object type indexes. - llvm::DenseMap funcIdToType; + std::vector> funcIdToType; /// Indicates if a type record is an item index or a type index. llvm::BitVector isItemIndex; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 21a1341f78443..ae2dc9afca280 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -334,8 +334,8 @@ static void translateIdSymbols(MutableArrayRef &recordData, // in both cases we just need the second type index. if (!ti->isSimple() && !ti->isNoneType()) { if (config->debugGHashes) { - auto idToType = source->funcIdToType.find(*ti); - if (idToType == source->funcIdToType.end()) { + auto idToType = tMerger.funcIdToType.find(*ti); + if (idToType == tMerger.funcIdToType.end()) { warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item " "index {1:X} which is not a LF_[M]FUNC_ID record", source->file->getName(), ti->getIndex())); diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h index be877cfda6e6b..72fd5fc72b011 100644 --- a/lld/COFF/TypeMerger.h +++ b/lld/COFF/TypeMerger.h @@ -45,6 +45,10 @@ class TypeMerger { /// indices in each TpiSource. void mergeTypesWithGHash(); + /// Map from PDB function id type indexes to PDB function type indexes. + /// Populated after mergeTypesWithGHash. + llvm::DenseMap funcIdToType; + /// Type records that will go into the PDB TPI stream. llvm::codeview::MergingTypeTableBuilder typeTable; From d12ae042e17b27ebc8d2b5ae3d8dd5f88384d093 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 1 Oct 2020 12:00:18 -0700 Subject: [PATCH 309/544] [lit] Fix Python 2/3 compat in new winreg search code This should fix the test failures on the clang win64 bot: http://lab.llvm.org:8011/builders/clang-x64-windows-msvc/builds/18830 It has been red since Sept 23-ish. This was subtle to debug. Windows has 'find' and 'sort' utilities in C:\Windows\system32, but they don't support all the same flags as the coreutils programs. I configured the buildbot above with Python 2.7 64-bit (hey, it was set up in 2016). When I installed git for Windows, I opted to add all the Unix utilities that come with git to the system PATH. This is *almost* enough to make the LLVM tests pass, but not quite, because if you use the system PATH, the Windows version of find and sort come first, but the tests that use diff, cmp, etc, will all pass. So only a handful of tests will fail, and with cryptic error messages. The code changed in this CL doesn't work with Python 2. Before Python 3.2, the winreg.OpenKey function did not accept the `access=` keyword argument, the caller was required to pass an unused `reserved` positional argument of 0. The try/except/pass around the OpenKey operation masked this usage error in Python 2. Further, the result of the registry operation has to be converted from unicode to add it to the environment, but that was incidental. --- llvm/utils/lit/lit/llvm/config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 1dbed8e705379..c8013945e3f93 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -133,7 +133,8 @@ def _find_git_windows_unix_tools(self, tools_needed): hives = [winreg.HKEY_LOCAL_MACHINE, winreg.HKEY_CURRENT_USER] for mask, hive in itertools.product(masks, hives): try: - with winreg.OpenKey(hive, r"SOFTWARE\GitForWindows", access=winreg.KEY_READ | mask) as key: + with winreg.OpenKey(hive, r"SOFTWARE\GitForWindows", 0, + winreg.KEY_READ | mask) as key: install_root, _ = winreg.QueryValueEx(key, 'InstallPath') if not install_root: @@ -143,7 +144,7 @@ def _find_git_windows_unix_tools(self, tools_needed): continue # We found it, stop enumerating. - return candidate_path + return lit.util.to_string(candidate_path) except: continue @@ -168,7 +169,7 @@ def norm(x): paths = [] # If we are passed a list [a b c], then iterating this list forwards - # and adding each to the beginning would result in b c a. So we + # and adding each to the beginning would result in c b a. So we # need to iterate in reverse to end up with the original ordering. for p in reversed(paths_to_add): # Move it to the front if it already exists, otherwise insert it at the From de47e7122f69d56399c4f8864ba279e5ce635970 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Thu, 1 Oct 2020 12:21:01 -0700 Subject: [PATCH 310/544] [CMake][Fuchsia] Don't set WIN32 API, rely on autodetection We prefer autodetection here to avoid persisting this configuration in the generated __config header which is shared across targets. Differential Revision: https://reviews.llvm.org/D88694 --- clang/cmake/caches/Fuchsia-stage2.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index e00b64073ca52..98db622ba34b3 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -84,7 +84,6 @@ if(WIN32) set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Windows CACHE STRING "") set(RUNTIMES_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") set(RUNTIMES_${target}_LIBCXX_ABI_VERSION 2 CACHE STRING "") - set(RUNTIMES_${target}_LIBCXX_HAS_WIN32_THREAD_API ON CACHE BOOL "") set(RUNTIMES_${target}_LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY OFF CACHE BOOL "") set(RUNTIMES_${target}_LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "") set(RUNTIMES_${target}_LIBCXX_ENABLE_ABI_LINKER_SCRIPT OFF CACHE BOOL "") From 88f2fe5cad6cc3a3830448cb8a88b52ee449f2d1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 1 Oct 2020 12:36:08 -0700 Subject: [PATCH 311/544] Raland D87318 [LLD][PowerPC] Add support for R_PPC64_GOT_TLSGD_PCREL34 used in TLS General Dynamic Add Thread Local Storage support for the 34 bit relocation R_PPC64_GOT_TLSGD_PCREL34 used in General Dynamic. The compiler will produce code that looks like: ``` pla r3, x@got@tlsgd@pcrel R_PPC64_GOT_TLSGD_PCREL34 bl __tls_get_addr@notoc(x@tlsgd) R_PPC64_TLSGD R_PPC64_REL24_NOTOC ``` LLD should be able to correctly compute the relocation for R_PPC64_GOT_TLSGD_PCREL34 as well as do the following two relaxations where possible: General Dynamic to Local Exec: ``` paddi r3, r13, x@tprel nop ``` and General Dynamic to Initial Exec: ``` pld r3, x@got@tprel@pcrel add r3, r3, r13 ``` Note: This patch adds support for the PC Relative (no TOC) version of General Dynamic on top of the existing support for the TOC version of General Dynamic. The ABI does not provide any way to tell by looking only at the relocation `R_PPC64_TLSGD` when it is being used in a TOC instruction sequence or and when it is being used in a no TOC sequence. The TOC sequence should always be 4 byte aligned. This patch adds one to the offset of the relocation when it is being used in a no TOC sequence. In this way LLD can tell by looking at the alignment of the offset of `R_PPC64_TLSGD` whether or not it is being used as part of a TOC or no TOC sequence. Reviewed By: NeHuang, sfertile, MaskRay Differential Revision: https://reviews.llvm.org/D87318 --- lld/ELF/Arch/PPC64.cpp | 76 +++++++++++++++++++++---- lld/ELF/Relocations.cpp | 13 +++++ lld/test/ELF/ppc64-tls-pcrel-gd.s | 94 +++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 lld/test/ELF/ppc64-tls-pcrel-gd.s diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 06dd863f31b2a..2e7b20d46cb09 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -727,15 +727,38 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13 relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; - case R_PPC64_TLSGD: - write32(loc, NOP); - write32(loc + 4, 0x38630000); // addi r3, r3 - // Since we are relocating a half16 type relocation and Loc + 4 points to - // the start of an instruction we need to advance the buffer by an extra - // 2 bytes on BE. - relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), - R_PPC64_TPREL16_LO, val); + case R_PPC64_GOT_TLSGD_PCREL34: + // Relax from paddi r3, 0, x@got@tlsgd@pcrel, 1 to + // paddi r3, r13, x@tprel, 0 + writePrefixedInstruction(loc, 0x06000000386d0000); + relocateNoSym(loc, R_PPC64_TPREL34, val); + break; + case R_PPC64_TLSGD: { + // PC Relative Relaxation: + // Relax from bl __tls_get_addr@notoc(x@tlsgd) to + // nop + // TOC Relaxation: + // Relax from bl __tls_get_addr(x@tlsgd) + // nop + // to + // nop + // addi r3, r3, x@tprel@l + const uintptr_t locAsInt = reinterpret_cast(loc); + if (locAsInt % 4 == 0) { + write32(loc, NOP); // nop + write32(loc + 4, 0x38630000); // addi r3, r3 + // Since we are relocating a half16 type relocation and Loc + 4 points to + // the start of an instruction we need to advance the buffer by an extra + // 2 bytes on BE. + relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), + R_PPC64_TPREL16_LO, val); + } else if (locAsInt % 4 == 1) { + write32(loc - 1, NOP); + } else { + errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment"); + } break; + } default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } @@ -947,6 +970,8 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSGD16_LO: return R_TLSGD_GOT; + case R_PPC64_GOT_TLSGD_PCREL34: + return R_TLSGD_PC; case R_PPC64_GOT_TLSLD16: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_GOT_TLSLD16_HI: @@ -1261,6 +1286,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { break; case R_PPC64_PCREL34: case R_PPC64_GOT_PCREL34: + case R_PPC64_GOT_TLSGD_PCREL34: case R_PPC64_GOT_TPREL_PCREL34: case R_PPC64_TPREL34: { const uint64_t si0Mask = 0x00000003ffff0000; @@ -1340,7 +1366,8 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000) return R_PPC64_RELAX_GOT_PC; } - if (expr == R_RELAX_TLS_GD_TO_IE) + + if (type != R_PPC64_GOT_TLSGD_PCREL34 && expr == R_RELAX_TLS_GD_TO_IE) return R_RELAX_TLS_GD_TO_IE_GOT_OFF; if (expr == R_RELAX_TLS_LD_TO_LE) return R_RELAX_TLS_LD_TO_LE_ABS; @@ -1381,10 +1408,35 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val); return; } - case R_PPC64_TLSGD: - write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop - write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 + case R_PPC64_GOT_TLSGD_PCREL34: { + // Relax from paddi r3, 0, sym@got@tlsgd@pcrel, 1 to + // pld r3, sym@got@tprel@pcrel + writePrefixedInstruction(loc, 0x04100000e4600000); + relocateNoSym(loc, R_PPC64_GOT_TPREL_PCREL34, val); + return; + } + case R_PPC64_TLSGD: { + // PC Relative Relaxation: + // Relax from bl __tls_get_addr@notoc(x@tlsgd) to + // nop + // TOC Relaxation: + // Relax from bl __tls_get_addr(x@tlsgd) + // nop + // to + // nop + // add r3, r3, r13 + const uintptr_t locAsInt = reinterpret_cast(loc); + if (locAsInt % 4 == 0) { + write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop + write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13 + } else if (locAsInt % 4 == 1) { + // bl __tls_get_addr(sym@tlsgd) --> add r3, r3, r13 + write32(loc - 1, 0x7c636a14); + } else { + errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment"); + } return; + } default: llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 4c6a70d9034e9..ea6aa3c6a12a4 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1357,6 +1357,19 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (type == R_PPC64_TOC16_LO && sym.isSection() && isa(sym) && cast(sym).section->name == ".toc") ppc64noTocRelax.insert({&sym, addend}); + + if (type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) { + if (i == end) { + errorOrWarn("R_PPC64_TLSGD may not be the last relocation" + + getLocation(sec, sym, offset)); + return; + } + + // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC case, + // so we can discern it later from the toc-case. + if (i->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC) + ++offset; + } } // Relax relocations. diff --git a/lld/test/ELF/ppc64-tls-pcrel-gd.s b/lld/test/ELF/ppc64-tls-pcrel-gd.s new file mode 100644 index 0000000000000..6dc8b80396e41 --- /dev/null +++ b/lld/test/ELF/ppc64-tls-pcrel-gd.s @@ -0,0 +1,94 @@ +# REQUIRES: ppc +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/asm -o %t.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t-defs.o +# RUN: ld.lld --shared %t-defs.o --soname=t-defs -o %t-defs.so +# RUN: ld.lld -T %t/lds --shared %t.o -o %t-gd.so +# RUN: ld.lld -T %t/lds %t.o %t-defs.so -o %t-gdtoie +# RUN: ld.lld -T %t/lds %t.o %t-defs.o -o %t-gdtole + +# RUN: llvm-readelf -r %t-gd.so | FileCheck %s --check-prefix=GD-RELOC +# RUN: llvm-readelf -s %t-gd.so | FileCheck %s --check-prefix=GD-SYM +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gd.so | FileCheck %s --check-prefix=GD + +# RUN: llvm-readelf -r %t-gdtoie | FileCheck %s --check-prefix=GDTOIE-RELOC +# RUN: llvm-readelf -s %t-gdtoie | FileCheck %s --check-prefix=GDTOIE-SYM +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gdtoie | FileCheck %s --check-prefix=GDTOIE + +# RUN: llvm-readelf -r %t-gdtole | FileCheck %s --check-prefix=GDTOLE-RELOC +# RUN: llvm-readelf -s %t-gdtole | FileCheck %s --check-prefix=GDTOLE-SYM +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t-gdtole | FileCheck %s --check-prefix=GDTOLE + +## This test checks the General Dynamic PC Relative TLS implementation for lld. +## GD - General Dynamic with no relaxation possible +## GDTOIE - General Dynamic relaxed to Initial Exec +## GDTOLE - General Dynamic relaxed to Local Exec + +#--- lds +SECTIONS { + .text_addr 0x1001000 : { *(.text_addr) } +} + +#--- defs +.section .tbss,"awT",@nobits +.globl x +x: + .long 0 +.globl y +y: + .long 0 + +#--- asm + +# GD-RELOC: Relocation section '.rela.dyn' at offset 0x100b8 contains 4 entries: +# GD-RELOC: 0000000001001160 0000000200000044 R_PPC64_DTPMOD64 0000000000000000 x + 0 +# GD-RELOC: 0000000001001168 000000020000004e R_PPC64_DTPREL64 0000000000000000 x + 0 +# GD-RELOC: 0000000001001170 0000000300000044 R_PPC64_DTPMOD64 0000000000000000 y + 0 +# GD-RELOC: 0000000001001178 000000030000004e R_PPC64_DTPREL64 0000000000000000 y + 0 + +# GD-SYM: Symbol table '.dynsym' contains 4 entries: +# GD-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x +# GD-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT UND y + + +# GDTOIE-RELOC: Relocation section '.rela.dyn' at offset 0x{{.*}} contains 2 entries: +# GDTOIE-RELOC: 00000000010010e0 0000000200000049 R_PPC64_TPREL64 0000000000000000 x + 0 +# GDTOIE-RELOC: 00000000010010e8 0000000300000049 R_PPC64_TPREL64 0000000000000000 y + 0 + +# GDTOIE-SYM: Symbol table '.dynsym' contains 4 entries: +# GDTOIE-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x +# GDTOIE-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT UND y + + +# GDTOLE-RELOC: There are no relocations in this file. + +# GDTOLE-SYM: Symbol table '.symtab' contains 5 entries: +# GDTOLE-SYM: 3: 0000000000000000 0 TLS GLOBAL DEFAULT 3 x +# GDTOLE-SYM: 4: 0000000000000004 0 TLS GLOBAL DEFAULT 3 y + +# GD-LABEL: : +# GD-NEXT: paddi 3, 0, 352, 1 +# GD-NEXT: bl +# GD-NEXT: paddi 3, 0, 356, 1 +# GD-NEXT: bl +# GD-NEXT: blr +# GDTOIE-LABEL: : +# GDTOIE-NEXT: pld 3, 224(0), 1 +# GDTOIE-NEXT: add 3, 3, 13 +# GDTOIE-NEXT: pld 3, 220(0), 1 +# GDTOIE-NEXT: add 3, 3, 13 +# GDTOIE-NEXT: blr +# GDTOLE-LABEL: : +# GDTOLE-NEXT: paddi 3, 13, -28672, 0 +# GDTOLE-NEXT: nop +# GDTOLE-NEXT: paddi 3, 13, -28668, 0 +# GDTOLE-NEXT: nop +# GDTOLE-NEXT: blr +.section .text_addr, "ax", %progbits +GDTwoVal: + paddi 3, 0, x@got@tlsgd@pcrel, 1 + bl __tls_get_addr@notoc(x@tlsgd) + paddi 3, 0, y@got@tlsgd@pcrel, 1 + bl __tls_get_addr@notoc(y@tlsgd) + blr From 35ecc7fe49ba881a77e8146b51870a60a52b211f Mon Sep 17 00:00:00 2001 From: Hubert Tong Date: Thu, 1 Oct 2020 15:46:26 -0400 Subject: [PATCH 312/544] [clang][Sema] Fix PR47676: Handle dependent AltiVec C-style cast Fix premature decision in the presence of type-dependent expression operands on whether AltiVec vector initializations from single expressions are "splat" operations. Verify that the instantiation is able to determine the correct cast semantics for both the scalar type and the vector type case. Note that, because the change only affects the single-expression case (and the target type is an AltiVec-style vector type), the replacement of a parenthesized list with a parenthesized expression does not change the semantics of the program in a program-observable manner. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D88526 --- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/test/SemaTemplate/pr47676.cpp | 38 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaTemplate/pr47676.cpp diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 22840dd3dfe3c..e51b276261849 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7408,7 +7408,7 @@ Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc, } if (PE || PLE->getNumExprs() == 1) { Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0)); - if (!E->getType()->isVectorType()) + if (!E->isTypeDependent() && !E->getType()->isVectorType()) isVectorLiteral = true; } else diff --git a/clang/test/SemaTemplate/pr47676.cpp b/clang/test/SemaTemplate/pr47676.cpp new file mode 100644 index 0000000000000..428607097c960 --- /dev/null +++ b/clang/test/SemaTemplate/pr47676.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -triple=powerpc64le-unknown-linux-gnu \ +// RUN: -target-feature +altivec -fsyntax-only -ast-dump \ +// RUN: -xc++ < %s 2>&1 \ +// RUN: | FileCheck %s + +// Ensures that casts to AltiVec type with a dependent expression operand does +// not hit the assertion failure reported in PR47676. Further checks that casts +// to AltiVec type with a dependent expression operand is, on instantiation, +// able to correctly differentiate between a splat case and a bitcast case. +template void f(T *tp) { + extern void g(int, ...); + g(0, (__vector int)(*tp)); + g(0, (__vector int)*tp); +} + +void g(void) { + f<__vector float>(nullptr); +// CHECK: | |-FunctionDecl {{.*}} f 'void (__vector float *)' + +// CHECK: | | `-CStyleCastExpr {{.*}} '__vector int' +// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} '__vector int' +// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}}'__vector float' + +// CHECK: | `-CStyleCastExpr {{.*}} '__vector int' +// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '__vector int' +// CHECK-NEXT: | `-ImplicitCastExpr {{.*}}'__vector float' + + f(nullptr); +// CHECK: | `-FunctionDecl {{.*}} f 'void (double *)' + +// CHECK: | | `-CStyleCastExpr {{.*}} '__vector int' +// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}}'double' + +// CHECK: | `-CStyleCastExpr {{.*}} '__vector int' +// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | `-ImplicitCastExpr {{.*}}:'double' +} From 4c265ce665630b74ad9f25f67cd2114714b9aaab Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 10:45:14 -0700 Subject: [PATCH 313/544] [AArch64][GlobalISel] Camp oversize v4s64 G_FPEXT operations. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 6 ++- .../AArch64/GlobalISel/legalize-fpext.mir | 41 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 4ca821322a918..3311cc37f176e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -421,8 +421,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor( {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) .clampMaxNumElements(0, s32, 2); - getActionDefinitionsBuilder(G_FPEXT).legalFor( - {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); + getActionDefinitionsBuilder(G_FPEXT) + .legalFor( + {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) + .clampMaxNumElements(0, s64, 2); // Conversions getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir new file mode 100644 index 0000000000000..11d9b2624f9f9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir @@ -0,0 +1,41 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -march=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s +--- +name: fpext_v4s64_v4s32 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $q0, $x0 + + ; CHECK-LABEL: name: fpext_v4s64_v4s32 + ; CHECK: liveins: $q0, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT]](<2 x s64>) + ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>) + ; CHECK: G_STORE [[UV2]](s64), [[COPY1]](p0) :: (store 8, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: G_STORE [[UV3]](s64), [[PTR_ADD]](p0) :: (store 8 + 8) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[UV4]](s64), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: G_STORE [[UV5]](s64), [[PTR_ADD2]](p0) :: (store 8 + 24) + ; CHECK: RET_ReallyLR + %0:_(<4 x s32>) = COPY $q0 + %1:_(p0) = COPY $x0 + %2:_(<4 x s64>) = G_FPEXT %0(<4 x s32>) + G_STORE %2(<4 x s64>), %1(p0) :: (store 32) + RET_ReallyLR + +... From 73457536ff335a2cbe2381354512e0fcf9d703fd Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 23 Sep 2020 15:52:49 -0700 Subject: [PATCH 314/544] [AArch64][GlobalISel] Use custom legalization for G_TRUNC for v8i8 vectors. Truncating to v8i8 is a case where we want to split the source but also generate intermediate truncates to reduce the size of the source vector before truncating down to v8i8. This implements the same strategy as what SelectionDAG does, but I'm not certain where if anywhere in generic code it should live. Use it for legalization of v8s8 = G_ICMP v8s32. Differential Revision: https://reviews.llvm.org/D88191 --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 64 +++++++++++++++- .../AArch64/GISel/AArch64LegalizerInfo.h | 2 + .../GlobalISel/legalize-vector-icmp.mir | 76 +++++++++++++++++++ 3 files changed, 140 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 3311cc37f176e..ffa49ad15b4c8 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" #include +#include "llvm/Support/MathExtras.h" #define DEBUG_TYPE "aarch64-legalinfo" @@ -373,7 +374,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalarOrEltIf( [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, s64) - .widenScalarOrEltToNextPow2(1); + .widenScalarOrEltToNextPow2(1) + .clampNumElements(0, v2s32, v4s32); getActionDefinitionsBuilder(G_FCMP) .legalFor({{s32, s32}, {s32, s64}}) @@ -412,7 +414,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalIf(ExtLegalFunc) .clampScalar(0, s64, s64); // Just for s128, others are handled above. - getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); + getActionDefinitionsBuilder(G_TRUNC) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, + 0, s8) + .customIf([=](const LegalityQuery &Query) { + LLT DstTy = Query.Types[0]; + LLT SrcTy = Query.Types[1]; + return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; + }) + .alwaysLegal(); getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); @@ -709,11 +720,60 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); case TargetOpcode::G_GLOBAL_VALUE: return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); + case TargetOpcode::G_TRUNC: + return legalizeVectorTrunc(MI, Helper); } llvm_unreachable("expected switch to return"); } +static void extractParts(Register Reg, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, + SmallVectorImpl &VRegs) { + for (int I = 0; I < NumParts; ++I) + VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); + MIRBuilder.buildUnmerge(VRegs, Reg); +} + +bool AArch64LegalizerInfo::legalizeVectorTrunc( + MachineInstr &MI, LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // Similar to how operand splitting is done in SelectiondDAG, we can handle + // %res(v8s8) = G_TRUNC %in(v8s32) by generating: + // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) + // %lo16(<4 x s16>) = G_TRUNC %inlo + // %hi16(<4 x s16>) = G_TRUNC %inhi + // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 + // %res(<8 x s8>) = G_TRUNC %in16 + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + assert(isPowerOf2_32(DstTy.getSizeInBits()) && + isPowerOf2_32(SrcTy.getSizeInBits())); + + // Split input type. + LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); + // First, split the source into two smaller vectors. + SmallVector SplitSrcs; + extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); + + // Truncate the splits into intermediate narrower elements. + LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); + for (unsigned I = 0; I < SplitSrcs.size(); ++I) + SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); + + auto Concat = MIRBuilder.buildConcatVectors( + DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); + + Helper.Observer.changingInstr(MI); + MI.getOperand(1).setReg(Concat.getReg(0)); + Helper.Observer.changedInstr(MI); + return true; +} + bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 1cb24559c1abf..8217e37c85128 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" namespace llvm { @@ -45,6 +46,7 @@ class AArch64LegalizerInfo : public LegalizerInfo { bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; + bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir index ce078624a9828..ec0446d0236fb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir @@ -1920,3 +1920,79 @@ body: | RET_ReallyLR implicit $d0 ... +--- +name: icmp_8xs1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: icmp_8xs1 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_ANYEXT %6(<8 x s1>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: icmp_8xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: icmp_8xs32 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s32>) = G_ICMP intpred(eq), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_TRUNC %6(<8 x s32>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +... From 1c1a8105580784c96212db1afc097a844740bc69 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Thu, 1 Oct 2020 22:31:30 +0200 Subject: [PATCH 315/544] libclc: Use find_package to find Python 3 and require it The script's shebang wants Python 3, so we use FindPython3. The original code didn't work when an unversioned python was not available. This is explicitly allowed in PEP 394. ("Distributors may choose to set the behavior of the python command as follows: python2, python3, not provide python command, allow python to be configurable by an end user or a system administrator.") Also I think it's actually required, so let the configuration fail if we can't find it. Lastly remove the shebang, since the script is only run via interpreter and doesn't have the executable bit set anyway. Reviewed By: jvesely Differential Revision: https://reviews.llvm.org/D88366 --- libclc/CMakeLists.txt | 4 ++-- libclc/generic/lib/gen_convert.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 1a77a378e192e..b8b5ceff086cc 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -184,11 +184,11 @@ if( ENABLE_RUNTIME_SUBNORMAL ) DESTINATION ${CMAKE_INSTALL_DATADIR}/clc ) endif() -find_program( PYTHON python ) +find_package( Python3 REQUIRED COMPONENTS Interpreter ) file( TO_CMAKE_PATH ${CMAKE_SOURCE_DIR}/generic/lib/gen_convert.py script_loc ) add_custom_command( OUTPUT convert.cl - COMMAND ${PYTHON} ${script_loc} > convert.cl + COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert.cl DEPENDS ${script_loc} ) add_custom_target( "generate_convert.cl" DEPENDS convert.cl ) diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py index 5c87fcbe1aba4..7e649faa7dfcb 100644 --- a/libclc/generic/lib/gen_convert.py +++ b/libclc/generic/lib/gen_convert.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - # OpenCL built-in library: type conversion functions # # Copyright (c) 2013 Victor Oliveira From b29573b672d795dfc58aaf70c70511229584e3c3 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 29 Sep 2020 16:06:32 -0700 Subject: [PATCH 316/544] [gn build] Support building with ThinLTO Differential Revision: https://reviews.llvm.org/D88584 --- llvm/utils/gn/build/BUILD.gn | 21 +++++++++++++++++++++ llvm/utils/gn/build/buildflags.gni | 18 ++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn index 373d371f017bd..f88f4bcc4b6b3 100644 --- a/llvm/utils/gn/build/BUILD.gn +++ b/llvm/utils/gn/build/BUILD.gn @@ -256,6 +256,27 @@ config("compiler_defaults") { cflags += [ "-fsanitize=thread" ] ldflags += [ "-fsanitize=thread" ] } + + if (use_thinlto) { + assert(is_clang, "ThinLTO only supported on Clang") + + lto_opt_level = 2 + + cflags += [ "-flto=thin" ] + + if (host_os == "win") { + ldflags += [ + "/opt:lldlto=" + lto_opt_level, + "/opt:lldltojobs=" + max_jobs_per_lto_link, + ] + } else { + ldflags += [ + "-flto=thin", + "-Wl,--thinlto-jobs=" + max_jobs_per_lto_link, + "-Wl,--lto-O" + lto_opt_level, + ] + } + } } config("no_exceptions") { diff --git a/llvm/utils/gn/build/buildflags.gni b/llvm/utils/gn/build/buildflags.gni index b04eae19a7846..9ad494a3c1e97 100644 --- a/llvm/utils/gn/build/buildflags.gni +++ b/llvm/utils/gn/build/buildflags.gni @@ -10,16 +10,22 @@ declare_args() { # Whether to build with asan. use_asan = false -} - -# args that depend on other args must live in a later declare_args() block. -declare_args() { - # Whether to build with optimizations. - is_optimized = !is_debug # Whether to enable assertions. llvm_enable_assertions = true # Whether to enable expensive checks. llvm_enable_expensive_checks = false + + # Whether to build with ThinLTO. + use_thinlto = false + + # Max jobs per ThinLTO link. + max_jobs_per_lto_link = 8 +} + +# args that depend on other args must live in a later declare_args() block. +declare_args() { + # Whether to build with optimizations. + is_optimized = !is_debug } From 9f6acb13586b0b3b4e83dc03648ced02517bd236 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 13:50:20 -0700 Subject: [PATCH 317/544] [AArch64][GlobalISel] Merge G_SHL, G_ASHR and G_LSHR legalizer rules together. There's no need for any difference between these. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 23 ++----------------- .../AArch64/GlobalISel/legalize-shift.mir | 23 ++++++++++++++++++- .../GlobalISel/legalizer-info-validation.mir | 1 + 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index ffa49ad15b4c8..31dde5d76ac79 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -113,7 +113,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v2s64, v2s64) .moreElementsToNextPow2(0); - getActionDefinitionsBuilder(G_SHL) + getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) .customIf([=](const LegalityQuery &Query) { const auto &SrcTy = Query.Types[0]; const auto &AmtTy = Query.Types[1]; @@ -153,26 +153,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0) .scalarize(0); - getActionDefinitionsBuilder({G_LSHR, G_ASHR}) - .customIf([=](const LegalityQuery &Query) { - const auto &SrcTy = Query.Types[0]; - const auto &AmtTy = Query.Types[1]; - return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && - AmtTy.getSizeInBits() == 32; - }) - .legalFor({{s32, s32}, - {s32, s64}, - {s64, s64}, - {v2s32, v2s32}, - {v4s32, v4s32}, - {v2s64, v2s64}, - {v16s8, v16s8}, - {v4s16, v4s16}, - {v8s16, v8s16}}) - .clampScalar(1, s32, s64) - .clampScalar(0, s32, s64) - .minScalarSameAs(1, 0); - getActionDefinitionsBuilder({G_SREM, G_UREM}) .lowerFor({s1, s8, s16, s32, s64}); @@ -346,6 +326,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor({s32, s64}) .clampScalar(0, s32, s64); + getActionDefinitionsBuilder(G_ICMP) .legalFor({{s32, s32}, {s32, s64}, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index 467f38672b706..8a28012766ef0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -march=aarch64 -run-pass=legalizer %s -o - | FileCheck %s -# RUN: llc -O0 -debugify-and-strip-all-safe -march=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK +# R UN: llc -O0 -debugify-and-strip-all-safe -march=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --- name: test_shift body: | @@ -410,3 +410,24 @@ body: | %2:_(<4 x s16>) = G_ASHR %0, %1 $d0 = COPY %2 ... +--- +name: test_ashr_v8s32 +body: | + bb.0: + ; CHECK-LABEL: name: test_ashr_v8s32 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<4 x s32>) + ; CHECK: [[ASHR1:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]](<4 x s32>) + ; CHECK: $q0 = COPY [[ASHR]](<4 x s32>) + ; CHECK: $q1 = COPY [[ASHR1]](<4 x s32>) + %0:_(<8 x s32>) = G_IMPLICIT_DEF + %1:_(<8 x s32>) = G_IMPLICIT_DEF + %2:_(<8 x s32>) = G_ASHR %0, %1 + %3:_(<4 x s32>), %4:_(<4 x s32>) = G_UNMERGE_VALUES %2 + $q0 = COPY %3(<4 x s32>) + $q1 = COPY %4(<4 x s32>) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index e4da0beb30229..357eb8b981c50 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -269,6 +269,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_LSHR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ASHR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices From 15ea45f16b261521e3251b4ff0bceaadf31a4515 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 1 Oct 2020 23:02:15 +0200 Subject: [PATCH 318/544] [lldb] Skip unique_ptr import-std-module tests on Linux This seems to fail on ubuntu 18.04.5 with Clang 9 due to: Error output: error: Couldn't lookup symbols: std::__1::default_delete::operator()(int) const --- .../unique_ptr-dbg-info-content/TestUniquePtrDbgInfoContent.py | 1 + .../import-std-module/unique_ptr/TestUniquePtrFromStdModule.py | 1 + 2 files changed, 2 insertions(+) diff --git a/lldb/test/API/commands/expression/import-std-module/unique_ptr-dbg-info-content/TestUniquePtrDbgInfoContent.py b/lldb/test/API/commands/expression/import-std-module/unique_ptr-dbg-info-content/TestUniquePtrDbgInfoContent.py index fafb29333924c..9f698af7e1b4a 100644 --- a/lldb/test/API/commands/expression/import-std-module/unique_ptr-dbg-info-content/TestUniquePtrDbgInfoContent.py +++ b/lldb/test/API/commands/expression/import-std-module/unique_ptr-dbg-info-content/TestUniquePtrDbgInfoContent.py @@ -13,6 +13,7 @@ class TestUniquePtrDbgInfoContent(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) + @skipIfLinux # s.reset() causes link errors on ubuntu 18.04/Clang 9 def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/unique_ptr/TestUniquePtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/unique_ptr/TestUniquePtrFromStdModule.py index 7bd3912437346..a3761ade04e30 100644 --- a/lldb/test/API/commands/expression/import-std-module/unique_ptr/TestUniquePtrFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/unique_ptr/TestUniquePtrFromStdModule.py @@ -13,6 +13,7 @@ class TestUniquePtr(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) + @skipIfLinux # s.reset() causes link errors on ubuntu 18.04/Clang 9 def test(self): self.build() From 4140f0744fb2deccb74e77282e23ff731f67821b Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 1 Oct 2020 16:11:00 -0400 Subject: [PATCH 319/544] [LLD][COFF] Fix crash with /summary and PCH input files Before this patch /summary was crashing with some .PCH.OBJ files, because tpiMap[srcIdx++] was reading at the wrong location. When the TpiSource depends on a .PCH.OBJ file, the types should be offset by the previously merged PCH.OBJ set of indices. Differential Revision: https://reviews.llvm.org/D88678 --- lld/COFF/DebugTypes.cpp | 5 +- lld/test/COFF/Inputs/precomp2-a.yaml | 84 +++++++++++++++++++++++++ lld/test/COFF/Inputs/precomp2.yaml | 82 ++++++++++++++++++++++++ lld/test/COFF/precomp-summary-fail.test | 22 +++++++ 4 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 lld/test/COFF/Inputs/precomp2-a.yaml create mode 100644 lld/test/COFF/Inputs/precomp2.yaml create mode 100644 lld/test/COFF/precomp-summary-fail.test diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 4ce031118c82f..baec05d1e87a6 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -319,6 +319,9 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { BinaryStreamReader reader(file->debugTypes, support::little); cantFail(reader.readArray(types, reader.getLength())); + // When dealing with PCH.OBJ, some indices were already merged. + unsigned nbHeadIndices = indexMapStorage.size(); + if (auto err = mergeTypeAndIdRecords( m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature)) fatal("codeview::mergeTypeAndIdRecords failed: " + @@ -335,7 +338,7 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { // collecting statistics. m->tpiCounts.resize(m->getTypeTable().size()); m->ipiCounts.resize(m->getIDTable().size()); - uint32_t srcIdx = 0; + uint32_t srcIdx = nbHeadIndices; for (CVType &ty : types) { TypeIndex dstIdx = tpiMap[srcIdx++]; // Type merging may fail, so a complex source type may become the simple diff --git a/lld/test/COFF/Inputs/precomp2-a.yaml b/lld/test/COFF/Inputs/precomp2-a.yaml new file mode 100644 index 0000000000000..a9d497ba10a3d --- /dev/null +++ b/lld/test/COFF/Inputs/precomp2-a.yaml @@ -0,0 +1,84 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$S' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Subsections: + - !Symbols + Records: + - Kind: S_OBJNAME + ObjNameSym: + Signature: 545589255 + ObjectName: 'D:\llvm-project\lld\test\COFF\Inputs\precomp2-a.obj' + - Kind: S_COMPILE3 + Compile3Sym: + Flags: [ SecurityChecks, HotPatch ] + Machine: X64 + FrontendMajor: 19 + FrontendMinor: 13 + FrontendBuild: 26131 + FrontendQFE: 1 + BackendMajor: 19 + BackendMinor: 13 + BackendBuild: 26131 + BackendQFE: 1 + Version: 'Microsoft (R) Optimizing Compiler' + - !StringTable + Strings: + - 'D:\llvm-project\lld\test\COFF\precomp\precomp.pch' + - 'D:\llvm-project\lld\test\COFF\precomp\precomp.h' + - 'D:\llvm-project\lld\test\COFF\precomp\a.cpp' + - Name: '.debug$T' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Types: + - Kind: LF_PRECOMP + Precomp: + StartTypeIndex: 4096 + TypesCount: 3 + Signature: 545589255 + PrecompFilePath: 'D:\llvm-project\lld\test\COFF\Inputs\precomp2.obj' + - Kind: LF_STRING_ID + StringId: + Id: 0 + String: 'test test test test test' + - Kind: LF_STRING_ID + StringId: + Id: 0 + String: 'test test test test test' + - Kind: LF_STRING_ID + StringId: + Id: 0 + String: 'test test test test test' + - Kind: LF_BUILDINFO + BuildInfo: + ArgIndices: [ 4101, 4101, 4101, 4101, 4101 ] +symbols: + - Name: '.debug$S' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.debug$T' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 +... diff --git a/lld/test/COFF/Inputs/precomp2.yaml b/lld/test/COFF/Inputs/precomp2.yaml new file mode 100644 index 0000000000000..7a4ec2f25af4e --- /dev/null +++ b/lld/test/COFF/Inputs/precomp2.yaml @@ -0,0 +1,82 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$S' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Subsections: + - !Symbols + Records: + - Kind: S_OBJNAME + ObjNameSym: + Signature: 545589255 + ObjectName: 'D:\llvm-project\lld\test\COFF\Inputs\precomp2.obj' + - Kind: S_COMPILE3 + Compile3Sym: + Flags: [ SecurityChecks, HotPatch ] + Machine: X64 + FrontendMajor: 19 + FrontendMinor: 13 + FrontendBuild: 26131 + FrontendQFE: 1 + BackendMajor: 19 + BackendMinor: 13 + BackendBuild: 26131 + BackendQFE: 1 + Version: 'Microsoft (R) Optimizing Compiler' + - !StringTable + Strings: + - 'D:\llvm-project\lld\test\COFF\precomp\precomp.pch' + - Name: '.debug$P' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + PrecompTypes: + - Kind: LF_STRUCTURE + Class: + MemberCount: 0 + Options: [ None, ForwardReference, HasUniqueName ] + FieldList: 0 + Name: _s__CatchableType + UniqueName: '.?AU_s__CatchableType@@' + DerivationList: 0 + VTableShape: 0 + Size: 0 + - Kind: LF_MODIFIER + Modifier: + ModifiedType: 4096 + Modifiers: [ None, Const ] + - Kind: LF_POINTER + Pointer: + ReferentType: 4096 + Attrs: 65548 + - Kind: LF_ENDPRECOMP + EndPrecomp: + Signature: 545589255 +symbols: + - Name: '.debug$S' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: '.debug$P' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 +... diff --git a/lld/test/COFF/precomp-summary-fail.test b/lld/test/COFF/precomp-summary-fail.test new file mode 100644 index 0000000000000..b689839be9d22 --- /dev/null +++ b/lld/test/COFF/precomp-summary-fail.test @@ -0,0 +1,22 @@ + +The input files were tailored so that we end up with a resulting IPI stream +smaller than the TPI stream, which would previously trigger a crash with +/summary. + +RUN: rm -rf %t && mkdir %t +RUN: yaml2obj < %S/Inputs/precomp2.yaml -o %t\precomp2.obj +RUN: yaml2obj < %S/Inputs/precomp2-a.yaml -o %t\precomp2-a.obj +RUN: lld-link %t\precomp2-a.obj %t\precomp2.obj /nodefaultlib /noentry \ +RUN: /dll /out:%t.dll /debug /summary | FileCheck %s -check-prefix SUMMARY + +SUMMARY: Summary +SUMMARY-NEXT: -------------------------------------------------------------------------------- +SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) +SUMMARY-NEXT: 0 PDB type server dependencies +SUMMARY-NEXT: 1 Precomp OBJ dependencies +SUMMARY-NEXT: 3 Merged TPI records +SUMMARY-NEXT: 2 Merged IPI records +SUMMARY-NEXT: 1 Output PDB strings +SUMMARY-NEXT: 0 Global symbol records +SUMMARY-NEXT: 4 Module symbol records +SUMMARY-NEXT: 0 Public symbol records From 8071c2f5c6149d0dc976819002dc46d9e7edfa40 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 14:04:54 -0700 Subject: [PATCH 320/544] [AArch64][GlobalISel] Make <8 x s8> shifts legal. --- .../GISel/AArch64InstructionSelector.cpp | 272 +++++++++--------- .../AArch64/GlobalISel/legalize-shift.mir | 43 +++ 2 files changed, 179 insertions(+), 136 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 1daa2b29b9d54..22e21b4bf0827 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -34,8 +34,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Type.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -101,8 +101,7 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool tryOptAndIntoCompareBranch(MachineInstr *LHS, - int64_t CmpConstant, + bool tryOptAndIntoCompareBranch(MachineInstr *LHS, int64_t CmpConstant, const CmpInst::Predicate &Pred, MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const; @@ -313,10 +312,11 @@ class AArch64InstructionSelector : public InstructionSelector { /// Returns a \p ComplexRendererFns which contains a base, offset, and whether /// or not a shift + extend should be folded into an addressing mode. Returns /// None when this is not profitable or possible. - ComplexRendererFns - selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, - MachineOperand &Offset, unsigned SizeInBytes, - bool WantsExt) const; + ComplexRendererFns selectExtendedSHL(MachineOperand &Root, + MachineOperand &Base, + MachineOperand &Offset, + unsigned SizeInBytes, + bool WantsExt) const; ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, unsigned SizeInBytes) const; @@ -360,7 +360,7 @@ class AArch64InstructionSelector : public InstructionSelector { /// subregister copy if necessary. Return either ExtReg, or the result of the /// new copy. Register narrowExtendRegIfNeeded(Register ExtReg, - MachineIRBuilder &MIB) const; + MachineIRBuilder &MIB) const; Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size, MachineIRBuilder &MIB) const; ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; @@ -1423,7 +1423,8 @@ static Optional getVectorShiftImm(Register Reg, // Check all operands are identical immediates. int64_t ImmVal = 0; for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { - auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); + auto VRegAndVal = + getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); if (!VRegAndVal) return None; @@ -1438,7 +1439,8 @@ static Optional getVectorShiftImm(Register Reg, /// Matches and returns the shift immediate value for a SHL instruction given /// a shift operand. -static Optional getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { +static Optional getVectorSHLImm(LLT SrcTy, Register Reg, + MachineRegisterInfo &MRI) { Optional ShiftImm = getVectorShiftImm(Reg, MRI); if (!ShiftImm) return None; @@ -1498,6 +1500,8 @@ bool AArch64InstructionSelector::selectVectorSHL( Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; } else if (Ty == LLT::vector(16, 8)) { Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; + } else if (Ty == LLT::vector(8, 8)) { + Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); return false; @@ -1557,6 +1561,9 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( } else if (Ty == LLT::vector(16, 8)) { Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; NegOpc = AArch64::NEGv8i16; + } else if (Ty == LLT::vector(8, 8)) { + Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; + NegOpc = AArch64::NEGv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); return false; @@ -1603,8 +1610,9 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } -void AArch64InstructionSelector::materializeLargeCMVal( - MachineInstr &I, const Value *V, unsigned OpFlags) const { +void AArch64InstructionSelector::materializeLargeCMVal(MachineInstr &I, + const Value *V, + unsigned OpFlags) const { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1635,8 +1643,8 @@ void AArch64InstructionSelector::materializeLargeCMVal( constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); return DstReg; }; - Register DstReg = BuildMovK(MovZ.getReg(0), - AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); + Register DstReg = + BuildMovK(MovZ.getReg(0), AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); return; @@ -1707,8 +1715,8 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { /// because the selector works bottom up, uses before defs. By the time we /// end up trying to select a G_PTR_ADD, we should have already attempted to /// fold this into addressing modes and were therefore unsuccessful. -bool AArch64InstructionSelector::convertPtrAddToAdd( - MachineInstr &I, MachineRegisterInfo &MRI) { +bool AArch64InstructionSelector::convertPtrAddToAdd(MachineInstr &I, + MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); Register DstReg = I.getOperand(0).getReg(); Register AddOp1Reg = I.getOperand(1).getReg(); @@ -1886,7 +1894,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { // Certain non-generic instructions also need some special handling. - if (Opcode == TargetOpcode::LOAD_STACK_GUARD) + if (Opcode == TargetOpcode::LOAD_STACK_GUARD) return constrainSelectedInstRegOperands(I, TII, TRI, RBI); if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { @@ -1894,10 +1902,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const LLT DefTy = MRI.getType(DefReg); const RegClassOrRegBank &RegClassOrBank = - MRI.getRegClassOrRegBank(DefReg); + MRI.getRegClassOrRegBank(DefReg); - const TargetRegisterClass *DefRC - = RegClassOrBank.dyn_cast(); + const TargetRegisterClass *DefRC = + RegClassOrBank.dyn_cast(); if (!DefRC) { if (!DefTy.isValid()) { LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); @@ -1922,7 +1930,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return true; } - if (I.getNumOperands() != I.getNumExplicitOperands()) { LLVM_DEBUG( dbgs() << "Generic instruction has unexpected implicit operands\n"); @@ -1998,10 +2005,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { .addUse(CondReg) .addImm(1); constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); - auto Bcc = - BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) - .addImm(AArch64CC::EQ) - .addMBB(DestMBB); + auto Bcc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) + .addImm(AArch64CC::EQ) + .addMBB(DestMBB); I.eraseFromParent(); return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); @@ -2375,8 +2381,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (NewOpc == I.getOpcode()) return nullptr; // Check if we can fold anything into the addressing mode. - auto AddrModeFns = - selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); + auto AddrModeFns = selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); if (!AddrModeFns) { // Can't fold anything. Use the original instruction. I.setDesc(TII.get(NewOpc)); @@ -2536,10 +2541,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // to the G_UADDO. CSINC increments the result when the predicate is false, // so to get the increment when it's true, we need to use the inverse. In // this case, we want to increment when carry is set. - auto CsetMI = MIRBuilder - .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {Register(AArch64::WZR), Register(AArch64::WZR)}) - .addImm(getInvertedCondCode(AArch64CC::HS)); + auto CsetMI = + MIRBuilder + .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, + {Register(AArch64::WZR), Register(AArch64::WZR)}) + .addImm(getInvertedCondCode(AArch64CC::HS)); constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -2775,14 +2781,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, - {DefReg}, {SrcReg}) - .addImm(0) - .addImm(SrcSize - 1); + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else if (DstSize <= 32) { ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, - {DefReg}, {SrcReg}) - .addImm(0) - .addImm(SrcSize - 1); + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else { return false; } @@ -2999,7 +3005,7 @@ bool AArch64InstructionSelector::selectJumpTable( // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. MachineIRBuilder MIB(I); auto MovMI = - MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) + MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) .addJumpTableIndex(JTI, AArch64II::MO_PAGE) .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); I.eraseFromParent(); @@ -3226,62 +3232,54 @@ bool AArch64InstructionSelector::selectVectorICmp( // tablegen selector. static const unsigned OpcTable[4][4][9] = { - { - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, - AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, - AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, - {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, - AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, - AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} - }, - { - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, - AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, - AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, - {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, - AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, - AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */} - }, - { - {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, - AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, - AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, - {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, - AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, - AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */} - }, - { - {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, - AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, - AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */} - }, + {{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, + AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, + AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, + {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, + AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, + AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}}, + {{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, + AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, + AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, + {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, + AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, + AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}}, + {{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, + AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, + AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, + {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, + AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, + AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}}, + {{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, + AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, + AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}}, }; unsigned EltIdx = Log2_32(SrcEltSize / 8); unsigned NumEltsIdx = Log2_32(NumElts / 2); @@ -3400,11 +3398,11 @@ bool AArch64InstructionSelector::selectMergeValues( Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); // Need to anyext the second scalar before we can use bfm MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(TargetOpcode::SUBREG_TO_REG)) - .addDef(SubToRegDef2) - .addImm(0) - .addUse(I.getOperand(2).getReg()) - .addImm(AArch64::sub_32); + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(SubToRegDef2) + .addImm(0) + .addUse(I.getOperand(2).getReg()) + .addImm(AArch64::sub_32); MachineInstr &BFM = *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) .addDef(I.getOperand(0).getReg()) @@ -3732,10 +3730,11 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( AArch64II::MO_PAGEOFF | AArch64II::MO_NC); break; case 8: - LoadMI = &*MIRBuilder - .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) - .addConstantPoolIndex( - CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + LoadMI = + &*MIRBuilder + .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) + .addConstantPoolIndex(CPIdx, 0, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); break; default: LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " @@ -4084,16 +4083,15 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( return &I; } -MachineInstr * -AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, - MachineIRBuilder &MIRBuilder) const { +MachineInstr *AArch64InstructionSelector::emitCSetForICMP( + Register DefReg, unsigned Pred, MachineIRBuilder &MIRBuilder) const { // CSINC increments the result when the predicate is false. Invert it. const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); - auto I = - MIRBuilder - .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) - .addImm(InvCC); + auto I = MIRBuilder + .buildInstr(AArch64::CSINCWr, {DefReg}, + {Register(AArch64::WZR), Register(AArch64::WZR)}) + .addImm(InvCC); constrainSelectedInstRegOperands(*I, TII, TRI, RBI); return &*I; } @@ -4291,8 +4289,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( if (!ValAndVReg || ValAndVReg->Value != 0) return nullptr; - return emitTST(LHSDef->getOperand(1), - LHSDef->getOperand(2), MIRBuilder); + return emitTST(LHSDef->getOperand(1), LHSDef->getOperand(2), MIRBuilder); } return nullptr; @@ -4971,7 +4968,9 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); } else { - MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); + MIRBuilder + .buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) + .addImm(1); MIRBuilder.buildInstr(AArch64::XPACLRI); MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); } @@ -5129,9 +5128,11 @@ static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { } InstructionSelector::ComplexRendererFns -AArch64InstructionSelector::selectExtendedSHL( - MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, - unsigned SizeInBytes, bool WantsExt) const { +AArch64InstructionSelector::selectExtendedSHL(MachineOperand &Root, + MachineOperand &Base, + MachineOperand &Offset, + unsigned SizeInBytes, + bool WantsExt) const { assert(Base.isReg() && "Expected base to be a register operand"); assert(Offset.isReg() && "Expected offset to be a register operand"); @@ -5270,8 +5271,8 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( /// /// Where x2 is the base register, and x3 is an offset register. /// -/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, -/// this will do so. Otherwise, it will return None. +/// When possible (or profitable) to fold a G_PTR_ADD into the address +/// calculation, this will do so. Otherwise, it will return None. InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectAddrModeRegisterOffset( MachineOperand &Root) const { @@ -5337,8 +5338,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, // Skip immediates that can be selected in the load/store addresing // mode. - if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && - ImmOff < (0x1000 << Scale)) + if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) return None; // Helper lambda to decide whether or not it is preferable to emit an add. @@ -5488,9 +5488,8 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, } InstructionSelector::ComplexRendererFns -AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, - unsigned Size, - MachineRegisterInfo &MRI) const { +AArch64InstructionSelector::tryFoldAddLowIntoImm( + MachineInstr &RootDef, unsigned Size, MachineRegisterInfo &MRI) const { if (RootDef.getOpcode() != AArch64::G_ADD_LOW) return None; MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); @@ -5498,7 +5497,8 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, return None; // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. - // TODO: Need to check GV's offset % size if doing offset folding into globals. + // TODO: Need to check GV's offset % size if doing offset folding into + // globals. assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); auto GV = Adrp.getOperand(1).getGlobal(); if (GV->isThreadLocal()) @@ -5755,11 +5755,10 @@ Register AArch64InstructionSelector::widenGPRBankRegIfNeeded( assert(SubReg && "Couldn't determine subregister?"); // Build the SUBREG_TO_REG and return the new, widened register. - auto SubRegToReg = - MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) - .addImm(0) - .addUse(Reg) - .addImm(SubReg); + auto SubRegToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) + .addImm(0) + .addUse(Reg) + .addImm(SubReg); constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); return SubRegToReg.getReg(0); } @@ -5843,8 +5842,9 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, MIB.addImm(CstVal.getValue()); } -void AArch64InstructionSelector::renderLogicalImm32( - MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { +void AArch64InstructionSelector::renderLogicalImm32(MachineInstrBuilder &MIB, + const MachineInstr &I, + int OpIdx) const { assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT"); uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); @@ -5852,8 +5852,9 @@ void AArch64InstructionSelector::renderLogicalImm32( MIB.addImm(Enc); } -void AArch64InstructionSelector::renderLogicalImm64( - MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { +void AArch64InstructionSelector::renderLogicalImm64(MachineInstrBuilder &MIB, + const MachineInstr &I, + int OpIdx) const { assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT"); uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); @@ -5890,7 +5891,6 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { } } - // Perform fixups on the given PHI instruction's operands to force them all // to be the same as the destination regbank. static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, @@ -5986,4 +5986,4 @@ createAArch64InstructionSelector(const AArch64TargetMachine &TM, AArch64RegisterBankInfo &RBI) { return new AArch64InstructionSelector(TM, Subtarget, RBI); } -} +} // namespace llvm diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index 8a28012766ef0..071af05b04247 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -431,3 +431,46 @@ body: | $q0 = COPY %3(<4 x s32>) $q1 = COPY %4(<4 x s32>) ... +--- +name: test_shl_v8s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_shl_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[SHL:%[0-9]+]]:_(<8 x s8>) = G_SHL [[COPY]], [[COPY1]](<8 x s8>) + ; CHECK: $d0 = COPY [[SHL]](<8 x s8>) + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHL %0, %1 + $d0 = COPY %2 +... +--- +name: test_ashr_v8s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_ashr_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[ASHR:%[0-9]+]]:_(<8 x s8>) = G_ASHR [[COPY]], [[COPY1]](<8 x s8>) + ; CHECK: $d0 = COPY [[ASHR]](<8 x s8>) + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_ASHR %0, %1 + $d0 = COPY %2 +... + +--- +name: test_lshr_v8s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_lshr_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[LSHR:%[0-9]+]]:_(<8 x s8>) = G_LSHR [[COPY]], [[COPY1]](<8 x s8>) + ; CHECK: $d0 = COPY [[LSHR]](<8 x s8>) + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_LSHR %0, %1 + $d0 = COPY %2 +... From 9a2b3bbc59d57c4cf3a3b898cbfa805c4cc9263f Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 14:15:57 -0700 Subject: [PATCH 321/544] Revert "[AArch64][GlobalISel] Make <8 x s8> shifts legal." Accidentally pushed this. --- .../GISel/AArch64InstructionSelector.cpp | 272 +++++++++--------- .../AArch64/GlobalISel/legalize-shift.mir | 43 --- 2 files changed, 136 insertions(+), 179 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 22e21b4bf0827..1daa2b29b9d54 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" -#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -34,8 +34,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/Type.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -101,7 +101,8 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool tryOptAndIntoCompareBranch(MachineInstr *LHS, int64_t CmpConstant, + bool tryOptAndIntoCompareBranch(MachineInstr *LHS, + int64_t CmpConstant, const CmpInst::Predicate &Pred, MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const; @@ -312,11 +313,10 @@ class AArch64InstructionSelector : public InstructionSelector { /// Returns a \p ComplexRendererFns which contains a base, offset, and whether /// or not a shift + extend should be folded into an addressing mode. Returns /// None when this is not profitable or possible. - ComplexRendererFns selectExtendedSHL(MachineOperand &Root, - MachineOperand &Base, - MachineOperand &Offset, - unsigned SizeInBytes, - bool WantsExt) const; + ComplexRendererFns + selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, + MachineOperand &Offset, unsigned SizeInBytes, + bool WantsExt) const; ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, unsigned SizeInBytes) const; @@ -360,7 +360,7 @@ class AArch64InstructionSelector : public InstructionSelector { /// subregister copy if necessary. Return either ExtReg, or the result of the /// new copy. Register narrowExtendRegIfNeeded(Register ExtReg, - MachineIRBuilder &MIB) const; + MachineIRBuilder &MIB) const; Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size, MachineIRBuilder &MIB) const; ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; @@ -1423,8 +1423,7 @@ static Optional getVectorShiftImm(Register Reg, // Check all operands are identical immediates. int64_t ImmVal = 0; for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { - auto VRegAndVal = - getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); + auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); if (!VRegAndVal) return None; @@ -1439,8 +1438,7 @@ static Optional getVectorShiftImm(Register Reg, /// Matches and returns the shift immediate value for a SHL instruction given /// a shift operand. -static Optional getVectorSHLImm(LLT SrcTy, Register Reg, - MachineRegisterInfo &MRI) { +static Optional getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { Optional ShiftImm = getVectorShiftImm(Reg, MRI); if (!ShiftImm) return None; @@ -1500,8 +1498,6 @@ bool AArch64InstructionSelector::selectVectorSHL( Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; } else if (Ty == LLT::vector(16, 8)) { Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; - } else if (Ty == LLT::vector(8, 8)) { - Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); return false; @@ -1561,9 +1557,6 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( } else if (Ty == LLT::vector(16, 8)) { Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; NegOpc = AArch64::NEGv8i16; - } else if (Ty == LLT::vector(8, 8)) { - Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; - NegOpc = AArch64::NEGv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); return false; @@ -1610,9 +1603,8 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } -void AArch64InstructionSelector::materializeLargeCMVal(MachineInstr &I, - const Value *V, - unsigned OpFlags) const { +void AArch64InstructionSelector::materializeLargeCMVal( + MachineInstr &I, const Value *V, unsigned OpFlags) const { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1643,8 +1635,8 @@ void AArch64InstructionSelector::materializeLargeCMVal(MachineInstr &I, constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); return DstReg; }; - Register DstReg = - BuildMovK(MovZ.getReg(0), AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); + Register DstReg = BuildMovK(MovZ.getReg(0), + AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); return; @@ -1715,8 +1707,8 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { /// because the selector works bottom up, uses before defs. By the time we /// end up trying to select a G_PTR_ADD, we should have already attempted to /// fold this into addressing modes and were therefore unsuccessful. -bool AArch64InstructionSelector::convertPtrAddToAdd(MachineInstr &I, - MachineRegisterInfo &MRI) { +bool AArch64InstructionSelector::convertPtrAddToAdd( + MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); Register DstReg = I.getOperand(0).getReg(); Register AddOp1Reg = I.getOperand(1).getReg(); @@ -1894,7 +1886,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { // Certain non-generic instructions also need some special handling. - if (Opcode == TargetOpcode::LOAD_STACK_GUARD) + if (Opcode == TargetOpcode::LOAD_STACK_GUARD) return constrainSelectedInstRegOperands(I, TII, TRI, RBI); if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { @@ -1902,10 +1894,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { const LLT DefTy = MRI.getType(DefReg); const RegClassOrRegBank &RegClassOrBank = - MRI.getRegClassOrRegBank(DefReg); + MRI.getRegClassOrRegBank(DefReg); - const TargetRegisterClass *DefRC = - RegClassOrBank.dyn_cast(); + const TargetRegisterClass *DefRC + = RegClassOrBank.dyn_cast(); if (!DefRC) { if (!DefTy.isValid()) { LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); @@ -1930,6 +1922,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return true; } + if (I.getNumOperands() != I.getNumExplicitOperands()) { LLVM_DEBUG( dbgs() << "Generic instruction has unexpected implicit operands\n"); @@ -2005,9 +1998,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { .addUse(CondReg) .addImm(1); constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); - auto Bcc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) - .addImm(AArch64CC::EQ) - .addMBB(DestMBB); + auto Bcc = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) + .addImm(AArch64CC::EQ) + .addMBB(DestMBB); I.eraseFromParent(); return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); @@ -2381,7 +2375,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (NewOpc == I.getOpcode()) return nullptr; // Check if we can fold anything into the addressing mode. - auto AddrModeFns = selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); + auto AddrModeFns = + selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); if (!AddrModeFns) { // Can't fold anything. Use the original instruction. I.setDesc(TII.get(NewOpc)); @@ -2541,11 +2536,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { // to the G_UADDO. CSINC increments the result when the predicate is false, // so to get the increment when it's true, we need to use the inverse. In // this case, we want to increment when carry is set. - auto CsetMI = - MIRBuilder - .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {Register(AArch64::WZR), Register(AArch64::WZR)}) - .addImm(getInvertedCondCode(AArch64CC::HS)); + auto CsetMI = MIRBuilder + .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, + {Register(AArch64::WZR), Register(AArch64::WZR)}) + .addImm(getInvertedCondCode(AArch64CC::HS)); constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -2781,14 +2775,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, - {DefReg}, {SrcReg}) - .addImm(0) - .addImm(SrcSize - 1); + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else if (DstSize <= 32) { ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, - {DefReg}, {SrcReg}) - .addImm(0) - .addImm(SrcSize - 1); + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else { return false; } @@ -3005,7 +2999,7 @@ bool AArch64InstructionSelector::selectJumpTable( // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. MachineIRBuilder MIB(I); auto MovMI = - MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) + MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) .addJumpTableIndex(JTI, AArch64II::MO_PAGE) .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); I.eraseFromParent(); @@ -3232,54 +3226,62 @@ bool AArch64InstructionSelector::selectVectorICmp( // tablegen selector. static const unsigned OpcTable[4][4][9] = { - {{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, - AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, - AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, - {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, - AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, - AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}}, - {{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, - AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, - AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, - {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, - AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, - AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}}, - {{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, - AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, - AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, - {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, - AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, - AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}}, - {{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, - AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, - AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}, - {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, - 0 /* invalid */}}, + { + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, + AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, + AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, + {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, + AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, + AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} + }, + { + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, + AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, + AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, + {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, + AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, + AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */} + }, + { + {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, + AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, + AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, + {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, + AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, + AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */} + }, + { + {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, + AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, + AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */}, + {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, + 0 /* invalid */} + }, }; unsigned EltIdx = Log2_32(SrcEltSize / 8); unsigned NumEltsIdx = Log2_32(NumElts / 2); @@ -3398,11 +3400,11 @@ bool AArch64InstructionSelector::selectMergeValues( Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); // Need to anyext the second scalar before we can use bfm MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(TargetOpcode::SUBREG_TO_REG)) - .addDef(SubToRegDef2) - .addImm(0) - .addUse(I.getOperand(2).getReg()) - .addImm(AArch64::sub_32); + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(SubToRegDef2) + .addImm(0) + .addUse(I.getOperand(2).getReg()) + .addImm(AArch64::sub_32); MachineInstr &BFM = *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) .addDef(I.getOperand(0).getReg()) @@ -3730,11 +3732,10 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( AArch64II::MO_PAGEOFF | AArch64II::MO_NC); break; case 8: - LoadMI = - &*MIRBuilder - .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) - .addConstantPoolIndex(CPIdx, 0, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + LoadMI = &*MIRBuilder + .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) + .addConstantPoolIndex( + CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); break; default: LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " @@ -4083,15 +4084,16 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( return &I; } -MachineInstr *AArch64InstructionSelector::emitCSetForICMP( - Register DefReg, unsigned Pred, MachineIRBuilder &MIRBuilder) const { +MachineInstr * +AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, + MachineIRBuilder &MIRBuilder) const { // CSINC increments the result when the predicate is false. Invert it. const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); - auto I = MIRBuilder - .buildInstr(AArch64::CSINCWr, {DefReg}, - {Register(AArch64::WZR), Register(AArch64::WZR)}) - .addImm(InvCC); + auto I = + MIRBuilder + .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) + .addImm(InvCC); constrainSelectedInstRegOperands(*I, TII, TRI, RBI); return &*I; } @@ -4289,7 +4291,8 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( if (!ValAndVReg || ValAndVReg->Value != 0) return nullptr; - return emitTST(LHSDef->getOperand(1), LHSDef->getOperand(2), MIRBuilder); + return emitTST(LHSDef->getOperand(1), + LHSDef->getOperand(2), MIRBuilder); } return nullptr; @@ -4968,9 +4971,7 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); } else { - MIRBuilder - .buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) - .addImm(1); + MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); MIRBuilder.buildInstr(AArch64::XPACLRI); MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); } @@ -5128,11 +5129,9 @@ static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { } InstructionSelector::ComplexRendererFns -AArch64InstructionSelector::selectExtendedSHL(MachineOperand &Root, - MachineOperand &Base, - MachineOperand &Offset, - unsigned SizeInBytes, - bool WantsExt) const { +AArch64InstructionSelector::selectExtendedSHL( + MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, + unsigned SizeInBytes, bool WantsExt) const { assert(Base.isReg() && "Expected base to be a register operand"); assert(Offset.isReg() && "Expected offset to be a register operand"); @@ -5271,8 +5270,8 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( /// /// Where x2 is the base register, and x3 is an offset register. /// -/// When possible (or profitable) to fold a G_PTR_ADD into the address -/// calculation, this will do so. Otherwise, it will return None. +/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, +/// this will do so. Otherwise, it will return None. InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectAddrModeRegisterOffset( MachineOperand &Root) const { @@ -5338,7 +5337,8 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, // Skip immediates that can be selected in the load/store addresing // mode. - if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) + if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && + ImmOff < (0x1000 << Scale)) return None; // Helper lambda to decide whether or not it is preferable to emit an add. @@ -5488,8 +5488,9 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, } InstructionSelector::ComplexRendererFns -AArch64InstructionSelector::tryFoldAddLowIntoImm( - MachineInstr &RootDef, unsigned Size, MachineRegisterInfo &MRI) const { +AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, + unsigned Size, + MachineRegisterInfo &MRI) const { if (RootDef.getOpcode() != AArch64::G_ADD_LOW) return None; MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); @@ -5497,8 +5498,7 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm( return None; // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. - // TODO: Need to check GV's offset % size if doing offset folding into - // globals. + // TODO: Need to check GV's offset % size if doing offset folding into globals. assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); auto GV = Adrp.getOperand(1).getGlobal(); if (GV->isThreadLocal()) @@ -5755,10 +5755,11 @@ Register AArch64InstructionSelector::widenGPRBankRegIfNeeded( assert(SubReg && "Couldn't determine subregister?"); // Build the SUBREG_TO_REG and return the new, widened register. - auto SubRegToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) - .addImm(0) - .addUse(Reg) - .addImm(SubReg); + auto SubRegToReg = + MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) + .addImm(0) + .addUse(Reg) + .addImm(SubReg); constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); return SubRegToReg.getReg(0); } @@ -5842,9 +5843,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, MIB.addImm(CstVal.getValue()); } -void AArch64InstructionSelector::renderLogicalImm32(MachineInstrBuilder &MIB, - const MachineInstr &I, - int OpIdx) const { +void AArch64InstructionSelector::renderLogicalImm32( + MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT"); uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); @@ -5852,9 +5852,8 @@ void AArch64InstructionSelector::renderLogicalImm32(MachineInstrBuilder &MIB, MIB.addImm(Enc); } -void AArch64InstructionSelector::renderLogicalImm64(MachineInstrBuilder &MIB, - const MachineInstr &I, - int OpIdx) const { +void AArch64InstructionSelector::renderLogicalImm64( + MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT"); uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); @@ -5891,6 +5890,7 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { } } + // Perform fixups on the given PHI instruction's operands to force them all // to be the same as the destination regbank. static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, @@ -5986,4 +5986,4 @@ createAArch64InstructionSelector(const AArch64TargetMachine &TM, AArch64RegisterBankInfo &RBI) { return new AArch64InstructionSelector(TM, Subtarget, RBI); } -} // namespace llvm +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index 071af05b04247..8a28012766ef0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -431,46 +431,3 @@ body: | $q0 = COPY %3(<4 x s32>) $q1 = COPY %4(<4 x s32>) ... ---- -name: test_shl_v8s8 -body: | - bb.0: - ; CHECK-LABEL: name: test_shl_v8s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 - ; CHECK: [[SHL:%[0-9]+]]:_(<8 x s8>) = G_SHL [[COPY]], [[COPY1]](<8 x s8>) - ; CHECK: $d0 = COPY [[SHL]](<8 x s8>) - %0:_(<8 x s8>) = COPY $d0 - %1:_(<8 x s8>) = COPY $d1 - %2:_(<8 x s8>) = G_SHL %0, %1 - $d0 = COPY %2 -... ---- -name: test_ashr_v8s8 -body: | - bb.0: - ; CHECK-LABEL: name: test_ashr_v8s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 - ; CHECK: [[ASHR:%[0-9]+]]:_(<8 x s8>) = G_ASHR [[COPY]], [[COPY1]](<8 x s8>) - ; CHECK: $d0 = COPY [[ASHR]](<8 x s8>) - %0:_(<8 x s8>) = COPY $d0 - %1:_(<8 x s8>) = COPY $d1 - %2:_(<8 x s8>) = G_ASHR %0, %1 - $d0 = COPY %2 -... - ---- -name: test_lshr_v8s8 -body: | - bb.0: - ; CHECK-LABEL: name: test_lshr_v8s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 - ; CHECK: [[LSHR:%[0-9]+]]:_(<8 x s8>) = G_LSHR [[COPY]], [[COPY1]](<8 x s8>) - ; CHECK: $d0 = COPY [[LSHR]](<8 x s8>) - %0:_(<8 x s8>) = COPY $d0 - %1:_(<8 x s8>) = COPY $d1 - %2:_(<8 x s8>) = G_LSHR %0, %1 - $d0 = COPY %2 -... From a97e97faedab0ba57f7c471f778d38cfd18988b8 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 14:18:38 -0700 Subject: [PATCH 322/544] [AArch64][GlobalISel] Make <8 x s8> shifts legal and add selection support. --- .../GISel/AArch64InstructionSelector.cpp | 5 +++ .../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +- .../AArch64/GlobalISel/legalize-shift.mir | 45 ++++++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 1daa2b29b9d54..bb132a33ac5bc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1498,6 +1498,8 @@ bool AArch64InstructionSelector::selectVectorSHL( Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; } else if (Ty == LLT::vector(16, 8)) { Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; + } else if (Ty == LLT::vector(8, 8)) { + Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); return false; @@ -1557,6 +1559,9 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( } else if (Ty == LLT::vector(16, 8)) { Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; NegOpc = AArch64::NEGv8i16; + } else if (Ty == LLT::vector(8, 8)) { + Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; + NegOpc = AArch64::NEGv8i8; } else { LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); return false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 31dde5d76ac79..8e38880034c34 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -124,13 +124,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {s32, s32}, {s32, s64}, {s64, s64}, + {v8s8, v8s8}, {v16s8, v16s8}, {v4s16, v4s16}, {v8s16, v8s16}, {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}, - }) .clampScalar(1, s32, s64) .clampScalar(0, s32, s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index 8a28012766ef0..bf0a95776d5de 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -march=aarch64 -run-pass=legalizer %s -o - | FileCheck %s -# R UN: llc -O0 -debugify-and-strip-all-safe -march=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK +# RUN: llc -O0 -debugify-and-strip-all-safe -march=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --- name: test_shift body: | @@ -431,3 +431,46 @@ body: | $q0 = COPY %3(<4 x s32>) $q1 = COPY %4(<4 x s32>) ... +--- +name: test_shl_v8s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_shl_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[SHL:%[0-9]+]]:_(<8 x s8>) = G_SHL [[COPY]], [[COPY1]](<8 x s8>) + ; CHECK: $d0 = COPY [[SHL]](<8 x s8>) + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHL %0, %1 + $d0 = COPY %2 +... +--- +name: test_ashr_v8s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_ashr_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[ASHR:%[0-9]+]]:_(<8 x s8>) = G_ASHR [[COPY]], [[COPY1]](<8 x s8>) + ; CHECK: $d0 = COPY [[ASHR]](<8 x s8>) + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_ASHR %0, %1 + $d0 = COPY %2 +... + +--- +name: test_lshr_v8s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_lshr_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[LSHR:%[0-9]+]]:_(<8 x s8>) = G_LSHR [[COPY]], [[COPY1]](<8 x s8>) + ; CHECK: $d0 = COPY [[LSHR]](<8 x s8>) + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_LSHR %0, %1 + $d0 = COPY %2 +... From e28c5899a24117cdb0b081a54508af486a2634a0 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 14:28:23 -0700 Subject: [PATCH 323/544] [AArch64][GlobalISel] Make <8 x s8> integer arithmetic ops legal. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +- .../AArch64/GlobalISel/legalize-add.mir | 20 +++++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-vabs.ll | 8 ++++++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 8e38880034c34..182727cd062f5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -100,7 +100,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) - .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8}) + .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) .scalarizeIf( [=](const LegalityQuery &Query) { return Query.Opcode == G_MUL && Query.Types[0] == v2s64; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index 5e0755836ce43..ab8510bf9d92b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -194,3 +194,23 @@ body: | $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 ... +--- +name: add_v8s8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: add_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[ADD:%[0-9]+]]:_(<8 x s8>) = G_ADD [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ADD]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_ADD %0, %1 + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 636522901ba4c..419cafc23186a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -982,8 +982,12 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind { define <8 x i8> @abspattern3(<8 x i8> %a) nounwind { ; CHECK-LABEL: abspattern3: -; CHECK: abs.8b -; CHECK-NEXT: ret +; DAG: abs.8b +; DAG-NEXT: ret + +; GISEL-DAG: neg.8b +; GISEL-DAG: cmgt.8b +; GISEL: bit.8b %tmp1neg = sub <8 x i8> zeroinitializer, %a %b = icmp slt <8 x i8> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a From 017b871502b0c6fe72f52c5b47780f77e38d9035 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 1 Oct 2020 14:47:58 -0700 Subject: [PATCH 324/544] [AArch64][GlobalISel] Alias rules for G_FCMP to G_ICMP. No need to be different here for the vast majority of rules. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 10 +- ...ector-icmp.mir => legalize-vector-cmp.mir} | 106 ++++++++++++++++++ .../GlobalISel/legalizer-info-validation.mir | 5 +- 3 files changed, 111 insertions(+), 10 deletions(-) rename llvm/test/CodeGen/AArch64/GlobalISel/{legalize-vector-icmp.mir => legalize-vector-cmp.mir} (93%) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 182727cd062f5..4da8406f3569d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -326,8 +326,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor({s32, s64}) .clampScalar(0, s32, s64); - - getActionDefinitionsBuilder(G_ICMP) + + getActionDefinitionsBuilder({G_ICMP, G_FCMP}) .legalFor({{s32, s32}, {s32, s64}, {s32, p0}, @@ -358,12 +358,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarOrEltToNextPow2(1) .clampNumElements(0, v2s32, v4s32); - getActionDefinitionsBuilder(G_FCMP) - .legalFor({{s32, s32}, {s32, s64}}) - .clampScalar(0, s32, s32) - .clampScalar(1, s32, s64) - .widenScalarToNextPow2(1); - // Extensions auto ExtLegalFunc = [=](const LegalityQuery &Query) { unsigned DstSize = Query.Types[0].getSizeInBits(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-cmp.mir similarity index 93% rename from llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir rename to llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-cmp.mir index ec0446d0236fb..3ec41a3a9358c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-cmp.mir @@ -1996,3 +1996,109 @@ body: | $d0 = COPY %7(<8 x s8>) RET_ReallyLR implicit $d0 ... +--- +name: fcmp_8xs1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: fcmp_8xs1 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(one), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[FCMP1:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(one), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s1>) = G_FCMP floatpred(one), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_ANYEXT %6(<8 x s1>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: fcmp_8xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: fcmp_8xs32 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(oeq), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[FCMP1:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(oeq), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s32>) = G_FCMP floatpred(oeq), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_TRUNC %6(<8 x s32>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: fcmp_v4s32 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +machineFunctionInfo: {} +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: fcmp_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(olt), [[COPY]](<4 x s32>), [[COPY1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>) + ; CHECK: $d0 = COPY [[TRUNC]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s1>) = G_FCMP floatpred(olt), %0(<4 x s32>), %1 + %3:_(<4 x s16>) = G_ANYEXT %2(<4 x s1>) + $d0 = COPY %3(<4 x s16>) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 357eb8b981c50..4d49365a8dabb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -286,8 +286,9 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected From 8e8664e55e8986e061283cb20c30f21fb2d2b641 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Thu, 1 Oct 2020 13:46:15 -0700 Subject: [PATCH 325/544] [AArch64][GlobalISel] Use emitTestBit in selection for G_BRCOND Partially refactoring, partially fixing a bug. - We shouldn't use TB(N)ZX unless the bit number is >= 32 - We can fold more than xor using emitTestBit Also remove a check which isn't relevant anymore + update tests. Rename select-brcond-of-not.mir to select-brcond-of-binop.mir, since it now tests more than just G_XOR. Differential Revision: https://reviews.llvm.org/D88702 --- .../GISel/AArch64InstructionSelector.cpp | 30 +-- .../GlobalISel/select-brcond-of-binop.mir | 235 ++++++++++++++++++ .../GlobalISel/select-brcond-of-not.mir | 76 ------ .../CodeGen/AArch64/GlobalISel/select.mir | 3 +- .../GlobalISel/widen-narrow-tbz-tbnz.mir | 7 +- 5 files changed, 244 insertions(+), 107 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-not.mir diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index bb132a33ac5bc..db6e88b01599a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1959,15 +1959,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { switch (Opcode) { case TargetOpcode::G_BRCOND: { - if (Ty.getSizeInBits() > 32) { - // We shouldn't need this on AArch64, but it would be implemented as an - // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the - // bit being tested is < 32. - LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty - << ", expected at most 32-bits"); - return false; - } - Register CondReg = I.getOperand(0).getReg(); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); @@ -1978,25 +1969,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return true; if (ProduceNonFlagSettingCondBr) { - unsigned BOpc = AArch64::TBNZW; - // Try to fold a not, i.e. a xor, cond, 1. - Register XorSrc; - int64_t Cst; - if (mi_match(CondReg, MRI, - m_GTrunc(m_GXor(m_Reg(XorSrc), m_ICst(Cst)))) && - Cst == 1) { - CondReg = XorSrc; - BOpc = AArch64::TBZW; - if (MRI.getType(XorSrc).getSizeInBits() > 32) - BOpc = AArch64::TBZX; - } - auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(BOpc)) - .addUse(CondReg) - .addImm(/*bit offset=*/0) - .addMBB(DestMBB); - + auto TestBit = emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, + DestMBB, MIB); I.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); + return constrainSelectedInstRegOperands(*TestBit, TII, TRI, RBI); } else { auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) .addDef(AArch64::WZR) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir new file mode 100644 index 0000000000000..9d480b8e96e02 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir @@ -0,0 +1,235 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- +name: condbr_of_not +legalized: true +regBankSelected: true +liveins: + - { reg: '$x0' } +body: | + ; CHECK-LABEL: name: condbr_of_not + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: TBZW [[LDRBBui]], 0, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.2, %bb.3 + liveins: $x0 + + %0:gpr(p0) = COPY $x0 + %8:gpr(s8) = G_LOAD %0(p0) :: (load 1) + %4:gpr(s32) = G_ANYEXT %8(s8) + %5:gpr(s32) = G_CONSTANT i32 1 + %6:gpr(s32) = G_XOR %4, %5 + %3:gpr(s1) = G_TRUNC %6(s32) + G_BRCOND %3(s1), %bb.3 + + bb.2: + RET_ReallyLR + + bb.3: + RET_ReallyLR + +... +--- +name: condbr_of_not_64 +legalized: true +regBankSelected: true +liveins: + - { reg: '$x0' } +body: | + ; CHECK-LABEL: name: condbr_of_not_64 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: TBZW [[LDRBBui]], 0, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + ; TB(N)ZX has no encoding if the bit being tested is < 32, so we should get + ; TBZW here. + ; + bb.1: + successors: %bb.2, %bb.3 + liveins: $x0 + + %0:gpr(p0) = COPY $x0 + %8:gpr(s8) = G_LOAD %0(p0) :: (load 1) + %4:gpr(s64) = G_ANYEXT %8(s8) + %5:gpr(s64) = G_CONSTANT i64 1 + %6:gpr(s64) = G_XOR %4, %5 + %3:gpr(s1) = G_TRUNC %6(s64) + G_BRCOND %3(s1), %bb.3 + + bb.2: + RET_ReallyLR + + bb.3: + RET_ReallyLR + +... +--- +name: condbr_of_and +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: condbr_of_and + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: %lhs:gpr32 = COPY $w0 + ; CHECK: TBNZW %lhs, 0, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.2, %bb.3 + liveins: $w0 + %lhs:gpr(s32) = COPY $w0 + %rhs:gpr(s32) = G_CONSTANT i32 1 + %op:gpr(s32) = G_AND %lhs, %rhs + %trunc:gpr(s1) = G_TRUNC %op(s32) + G_BRCOND %trunc(s1), %bb.3 + + bb.2: + RET_ReallyLR + + bb.3: + RET_ReallyLR + +... +--- +name: condbr_of_and_no_cst +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: condbr_of_and_no_cst + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: %lhs:gpr32 = COPY $w0 + ; CHECK: %rhs:gpr32 = COPY $w1 + ; CHECK: %op:gpr32 = ANDWrr %lhs, %rhs + ; CHECK: TBNZW %op, 0, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.2, %bb.3 + liveins: $w0, $w1 + %lhs:gpr(s32) = COPY $w0 + %rhs:gpr(s32) = COPY $w1 + %op:gpr(s32) = G_AND %lhs, %rhs + %trunc:gpr(s1) = G_TRUNC %op(s32) + G_BRCOND %trunc(s1), %bb.3 + + bb.2: + RET_ReallyLR + + bb.3: + RET_ReallyLR + +... +--- +name: condbr_of_shl +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: condbr_of_shl + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: %lhs:gpr32 = COPY $w0 + ; CHECK: %op:gpr32 = UBFMWri %lhs, 31, 30 + ; CHECK: TBNZW %op, 0, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + ; We won't ever fold this, because + ; bit = 0 + ; bit - constant < 0, which isn't valid for tbz/tbnz. + ; + bb.1: + successors: %bb.2, %bb.3 + liveins: $w0 + %lhs:gpr(s32) = COPY $w0 + %rhs:gpr(s32) = G_CONSTANT i32 1 + %op:gpr(s32) = G_SHL %lhs, %rhs + %trunc:gpr(s1) = G_TRUNC %op(s32) + G_BRCOND %trunc(s1), %bb.3 + + bb.2: + RET_ReallyLR + + bb.3: + RET_ReallyLR + +... +--- +name: condbr_of_ashr +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: condbr_of_ashr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: %lhs:gpr32 = COPY $w0 + ; CHECK: TBNZW %lhs, 1, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + ; We can fold ashr, because we can have + ; + ; (tbz (ashr x, c), 0) where 0 + c > # bits in x. + ; + bb.1: + successors: %bb.2, %bb.3 + liveins: $w0 + %lhs:gpr(s32) = COPY $w0 + %rhs:gpr(s32) = G_CONSTANT i32 1 + %op:gpr(s32) = G_ASHR %lhs, %rhs + %trunc:gpr(s1) = G_TRUNC %op(s32) + G_BRCOND %trunc(s1), %bb.3 + + bb.2: + RET_ReallyLR + + bb.3: + RET_ReallyLR + +... +--- +name: tbnzx +legalized: true +regBankSelected: true +body: | + ; CHECK-LABEL: name: tbnzx + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: %lhs:gpr64 = COPY $x0 + ; CHECK: TBNZX %lhs, 63, %bb.2 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.2, %bb.3 + liveins: $x0 + %lhs:gpr(s64) = COPY $x0 + %rhs:gpr(s64) = G_CONSTANT i64 8589934592 + %op:gpr(s64) = G_ASHR %lhs, %rhs + %trunc:gpr(s1) = G_TRUNC %op(s64) + G_BRCOND %trunc(s1), %bb.3 + bb.2: + RET_ReallyLR + bb.3: + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-not.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-not.mir deleted file mode 100644 index 41fe50d9bb7dc..0000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-not.mir +++ /dev/null @@ -1,76 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- -name: condbr_of_not -legalized: true -regBankSelected: true -liveins: - - { reg: '$x0' } -body: | - ; CHECK-LABEL: name: condbr_of_not - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) - ; CHECK: TBZW [[LDRBBui]], 0, %bb.2 - ; CHECK: bb.1: - ; CHECK: RET_ReallyLR - ; CHECK: bb.2: - ; CHECK: RET_ReallyLR - bb.1: - successors: %bb.2, %bb.3 - liveins: $x0 - - %0:gpr(p0) = COPY $x0 - %8:gpr(s8) = G_LOAD %0(p0) :: (load 1) - %4:gpr(s32) = G_ANYEXT %8(s8) - %5:gpr(s32) = G_CONSTANT i32 1 - %6:gpr(s32) = G_XOR %4, %5 - %3:gpr(s1) = G_TRUNC %6(s32) - G_BRCOND %3(s1), %bb.3 - - bb.2: - RET_ReallyLR - - bb.3: - RET_ReallyLR - -... ---- -name: condbr_of_not_64 -legalized: true -regBankSelected: true -liveins: - - { reg: '$x0' } -body: | - ; CHECK-LABEL: name: condbr_of_not_64 - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDRBBui]], %subreg.sub_32 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] - ; CHECK: TBZX [[COPY1]], 0, %bb.2 - ; CHECK: bb.1: - ; CHECK: RET_ReallyLR - ; CHECK: bb.2: - ; CHECK: RET_ReallyLR - bb.1: - successors: %bb.2, %bb.3 - liveins: $x0 - - %0:gpr(p0) = COPY $x0 - %8:gpr(s8) = G_LOAD %0(p0) :: (load 1) - %4:gpr(s64) = G_ANYEXT %8(s8) - %5:gpr(s64) = G_CONSTANT i64 1 - %6:gpr(s64) = G_XOR %4, %5 - %3:gpr(s1) = G_TRUNC %6(s64) - G_BRCOND %3(s1), %bb.3 - - bb.2: - RET_ReallyLR - - bb.3: - RET_ReallyLR - -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir index 112aee8d552ce..a2a41a8aaa311 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir @@ -223,8 +223,9 @@ tracksRegLiveness: true # CHECK: registers: # CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } # CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir index 22963c50a2ebe..0794e2a3e58a8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir @@ -174,9 +174,10 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $x0 - ; CHECK: %wide:gpr64 = COPY $x0 - ; CHECK: %trunc:gpr32 = COPY %wide.sub_32 - ; CHECK: TBNZW %trunc, 0, %bb.1 + ; CHECK: %wide:gpr64all = COPY $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %wide.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 0, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR From 5402d11b1d8853ff10417b0f8d32edde3f4a51c0 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Wed, 30 Sep 2020 14:01:12 -0700 Subject: [PATCH 326/544] [GlobalISel][AArch64] Don't emit cset for G_FCMPs feeding into G_BRCONDs Similar to the FP case in `AArch64TargetLowering::LowerBR_CC`. Instead of emitting the csets + a tbnz, just emit a compare + bcc (or two bccs, depending on the condition code) This improves cases like this: https://godbolt.org/z/v8hebx This is a 0.1% geomean code size improvement for CTMark at -O3. Differential Revision: https://reviews.llvm.org/D88624 --- .../GISel/AArch64InstructionSelector.cpp | 24 +- .../AArch64/GlobalISel/fold-brcond-fcmp.mir | 555 ++++++++++++++++++ 2 files changed, 575 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/fold-brcond-fcmp.mir diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index db6e88b01599a..82eca0bbb9c48 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1322,15 +1322,31 @@ bool AArch64InstructionSelector::selectCompareBranch( MachineInstr *CCMI = MRI.getVRegDef(CondReg); if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); - if (CCMI->getOpcode() != TargetOpcode::G_ICMP) + + unsigned CCMIOpc = CCMI->getOpcode(); + if (CCMIOpc != TargetOpcode::G_ICMP && CCMIOpc != TargetOpcode::G_FCMP) return false; + MachineIRBuilder MIB(I); Register LHS = CCMI->getOperand(2).getReg(); Register RHS = CCMI->getOperand(3).getReg(); + auto Pred = + static_cast(CCMI->getOperand(1).getPredicate()); + + if (CCMIOpc == TargetOpcode::G_FCMP) { + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't + // totally clean. Some of them require two branches to implement. + emitFPCompare(LHS, RHS, MIB); + AArch64CC::CondCode CC1, CC2; + changeFCMPPredToAArch64CC(Pred, CC1, CC2); + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); + if (CC2 != AArch64CC::AL) + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); + I.eraseFromParent(); + return true; + } + auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); - MachineIRBuilder MIB(I); - CmpInst::Predicate Pred = - (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); // When we can emit a TB(N)Z, prefer that. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-brcond-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-brcond-fcmp.mir new file mode 100644 index 0000000000000..08f478e12521b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-brcond-fcmp.mir @@ -0,0 +1,555 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that we don't have to emit a CSINC when emitting a G_FCMP being used by +# a G_BRCOND. +# +# Condition codes which require more than one instruction should have two Bccs. + +... +--- +name: oeq +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: oeq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(oeq), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ogt +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ogt + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 12, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ogt), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: oge +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: oge + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(oge), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: olt +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: olt + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 4, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(olt), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ole +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ole + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 9, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ole), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: one +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: one + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 4, %bb.2, implicit $nzcv + ; CHECK: Bcc 12, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(one), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ord +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ord + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 7, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ord), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: uno +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: uno + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 6, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(uno), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ueq +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ueq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 0, %bb.2, implicit $nzcv + ; CHECK: Bcc 6, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ueq), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ugt +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ugt + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 8, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ugt), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: uge +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: uge + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 5, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(uge), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ult +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ult + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ult), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: ule +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ule + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 13, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(ule), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 + +... +--- +name: une +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: une + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cmp_lhs:fpr32 = COPY $s0 + ; CHECK: %cmp_rhs:fpr32 = COPY $s1 + ; CHECK: FCMPSrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv + ; CHECK: Bcc 1, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: $s0 = COPY %cmp_lhs + ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK: bb.2: + ; CHECK: $s1 = COPY %cmp_rhs + ; CHECK: RET_ReallyLR implicit $s1 + bb.0: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $s0, $s1, $w0, $w1 + + %cmp_lhs:fpr(s32) = COPY $s0 + %cmp_rhs:fpr(s32) = COPY $s1 + %fcmp:gpr(s32) = G_FCMP floatpred(une), %cmp_lhs(s32), %cmp_rhs + %trunc:gpr(s1) = G_TRUNC %fcmp(s32) + G_BRCOND %trunc(s1), %bb.2 + G_BR %bb.1 + bb.1: + $s0 = COPY %cmp_lhs + RET_ReallyLR implicit $s0 + bb.2: + $s1 = COPY %cmp_rhs + RET_ReallyLR implicit $s1 From e99d184d54937b56d5f4f1ba06fb984019beaee1 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 12:26:25 -0700 Subject: [PATCH 327/544] [flang] Readability improvement in binary->decimal conversion Tweak binary->decimal conversions to avoid an integer multiplication in a hot loop to improve readability and get a minor (~5%) speed-up. Use native integer division by constants for more readability, too, since current build compilers seem to optimize it correctly now. Delete the now needless temporary work-around facility in Common/unsigned-const-division.h. Differential revision: https://reviews.llvm.org/D88604 --- .../flang/Common/unsigned-const-division.h | 77 ------------------- flang/lib/Decimal/big-radix-floating-point.h | 9 +-- flang/lib/Decimal/binary-to-decimal.cpp | 43 ++++++----- flang/runtime/edit-output.cpp | 5 +- 4 files changed, 31 insertions(+), 103 deletions(-) delete mode 100644 flang/include/flang/Common/unsigned-const-division.h diff --git a/flang/include/flang/Common/unsigned-const-division.h b/flang/include/flang/Common/unsigned-const-division.h deleted file mode 100644 index 0799edbe0ef99..0000000000000 --- a/flang/include/flang/Common/unsigned-const-division.h +++ /dev/null @@ -1,77 +0,0 @@ -//===-- include/flang/Common/unsigned-const-division.h ----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_ -#define FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_ - -// Work around unoptimized implementations of unsigned integer division -// by constant values in some compilers (looking at YOU, clang 7!) by -// explicitly implementing integer division by constant divisors as -// multiplication by a fixed-point reciprocal and a right shift. - -#include "bit-population-count.h" -#include "leading-zero-bit-count.h" -#include "uint128.h" -#include -#include - -namespace Fortran::common { - -template class FixedPointReciprocal { -public: - using type = UINT; - -private: - static_assert(std::is_unsigned_v); - static const int bits{static_cast(8 * sizeof(type))}; - static_assert(bits <= 64); - using Big = HostUnsignedIntType; - -public: - static constexpr FixedPointReciprocal For(type n) { - if (n == 0) { - return {0, 0}; - } else if ((n & (n - 1)) == 0) { // n is a power of two - return {TrailingZeroBitCount(n), 1}; - } else { - int shift{bits - 1 + BitsNeededFor(n)}; - return {shift, static_cast(((Big{1} << shift) + n - 1) / n)}; - } - } - - constexpr type Divide(type n) const { - return static_cast((static_cast(reciprocal_) * n) >> shift_); - } - -private: - constexpr FixedPointReciprocal(int s, type r) : shift_{s}, reciprocal_{r} {} - - int shift_; - type reciprocal_; -}; - -static_assert(FixedPointReciprocal::For(5).Divide(2000000000u) == - 400000000u); -static_assert(FixedPointReciprocal::For(10).Divide( - 10000000000000000u) == 1000000000000000u); - -template -inline constexpr UINT DivideUnsignedBy(UINT n) { - if constexpr (std::is_same_v) { - return n / static_cast(DENOM); - } else { - // G++ can recognize that the reciprocal is a compile-time - // constant when For() is called inline, but clang requires - // a constexpr variable definition to force compile-time - // evaluation of the reciprocal. - constexpr auto recip{FixedPointReciprocal::For(DENOM)}; - return recip.Divide(n); - } -} -} // namespace Fortran::common -#endif diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h index b0ee69ad5e426..4ae417cd9263e 100644 --- a/flang/lib/Decimal/big-radix-floating-point.h +++ b/flang/lib/Decimal/big-radix-floating-point.h @@ -24,7 +24,6 @@ #include "flang/Common/bit-population-count.h" #include "flang/Common/leading-zero-bit-count.h" #include "flang/Common/uint128.h" -#include "flang/Common/unsigned-const-division.h" #include "flang/Decimal/binary-floating-point.h" #include "flang/Decimal/decimal.h" #include @@ -147,7 +146,7 @@ template class BigRadixFloatingPointNumber { std::is_same_v || std::is_unsigned_v); SetToZero(); while (n != 0) { - auto q{common::DivideUnsignedBy(n)}; + auto q{n / 10u}; if (n != q * 10) { break; } @@ -161,7 +160,7 @@ template class BigRadixFloatingPointNumber { return 0; } else { while (n != 0 && digits_ < digitLimit_) { - auto q{common::DivideUnsignedBy(n)}; + auto q{n / radix}; digit_[digits_++] = static_cast(n - q * radix); n = q; } @@ -214,7 +213,7 @@ template class BigRadixFloatingPointNumber { template int DivideBy() { Digit remainder{0}; for (int j{digits_ - 1}; j >= 0; --j) { - Digit q{common::DivideUnsignedBy(digit_[j])}; + Digit q{digit_[j] / DIVISOR}; Digit nrem{digit_[j] - DIVISOR * q}; digit_[j] = q + (radix / DIVISOR) * remainder; remainder = nrem; @@ -295,7 +294,7 @@ template class BigRadixFloatingPointNumber { template int MultiplyByHelper(int carry = 0) { for (int j{0}; j < digits_; ++j) { auto v{N * digit_[j] + carry}; - carry = common::DivideUnsignedBy(v); + carry = v / radix; digit_[j] = v - carry * radix; // i.e., v % radix } return carry; diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp index c89bffc8ccd4c..af233d586941b 100644 --- a/flang/lib/Decimal/binary-to-decimal.cpp +++ b/flang/lib/Decimal/binary-to-decimal.cpp @@ -100,28 +100,35 @@ BigRadixFloatingPointNumber::ConvertToDecimal(char *buffer, "4041424344454647484950515253545556575859" "6061626364656667686970717273747576777879" "8081828384858687888990919293949596979899"; - static constexpr Digit hundredth{radix / 100}; // Treat the MSD specially: don't emit leading zeroes. Digit dig{digit_[digits_ - 1]}; - for (int k{0}; k < LOG10RADIX; k += 2) { - Digit d{common::DivideUnsignedBy(dig)}; - dig = 100 * (dig - d * hundredth); - const char *q{lut + 2 * d}; - if (q[0] != '0' || p > start) { - *p++ = q[0]; - *p++ = q[1]; - } else if (q[1] != '0') { - *p++ = q[1]; - } + char stack[LOG10RADIX], *sp{stack}; + for (int k{0}; k < log10Radix; k += 2) { + Digit newDig{dig / 100}; + auto d{static_cast(dig) - + std::uint32_t{100} * static_cast(newDig)}; + dig = newDig; + const char *q{lut + d + d}; + *sp++ = q[1]; + *sp++ = q[0]; + } + while (sp > stack && sp[-1] == '0') { + --sp; + } + while (sp > stack) { + *p++ = *--sp; } for (int j{digits_ - 1}; j-- > 0;) { Digit dig{digit_[j]}; + char *reverse{p += log10Radix}; for (int k{0}; k < log10Radix; k += 2) { - Digit d{common::DivideUnsignedBy(dig)}; - dig = 100 * (dig - d * hundredth); - const char *q{lut + 2 * d}; - *p++ = q[0]; - *p++ = q[1]; + Digit newDig{dig / 100}; + auto d{static_cast(dig) - + std::uint32_t{100} * static_cast(newDig)}; + dig = newDig; + const char *q{lut + d + d}; + *--reverse = q[1]; + *--reverse = q[0]; } } // Adjust exponent so the effective decimal point is to @@ -251,9 +258,9 @@ void BigRadixFloatingPointNumber::Minimize( Digit least{less.digit_[offset]}; Digit my{digit_[0]}; while (true) { - Digit q{common::DivideUnsignedBy(my)}; + Digit q{my / 10u}; Digit r{my - 10 * q}; - Digit lq{common::DivideUnsignedBy(least)}; + Digit lq{least / 10u}; Digit lr{least - 10 * lq}; if (r != 0 && lq == q) { Digit sub{(r - lr) >> 1}; diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index 4d27cb6320df0..bae3606689e7d 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -8,7 +8,6 @@ #include "edit-output.h" #include "flang/Common/uint128.h" -#include "flang/Common/unsigned-const-division.h" #include namespace Fortran::runtime::io { @@ -32,7 +31,7 @@ bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit, INT n) { signChars = 1; // '-' or '+' } while (un > 0) { - auto quotient{common::DivideUnsignedBy(un)}; + auto quotient{un / 10u}; *--p = '0' + static_cast(un - UINT{10} * quotient); un = quotient; } @@ -99,7 +98,7 @@ const char *RealOutputEditingBase::FormatExponent( char *eEnd{&exponent_[sizeof exponent_]}; char *exponent{eEnd}; for (unsigned e{static_cast(std::abs(expo))}; e > 0;) { - unsigned quotient{common::DivideUnsignedBy(e)}; + unsigned quotient{e / 10u}; *--exponent = '0' + e - 10 * quotient; e = quotient; } From caeb13aba853b949ca45627f023dbeac77c13b2f Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 1 Oct 2020 15:44:12 -0700 Subject: [PATCH 328/544] [AMDGPU] Allow SOP asm mnemonic to differ Allows the creation of real SOP1 instructions with assembler mnemonics that differ from their pseudo-instruction mnemonics. The default behavior keeps the mnemonics matching. Corrects a subtarget label typo in a comment. Authored By: Joe_Nash Differential Revision: https://reviews.llvm.org/D88708 --- llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index ecfe4c79063f6..76257ed1584be 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -54,9 +54,9 @@ class SOP1_Pseudo has_sdst = 1; } -class SOP1_Real op, SOP1_Pseudo ps> : +class SOP1_Real op, SOP1_Pseudo ps, string real_name = ps.Mnemonic> : InstSI , + real_name # " " # ps.AsmOperands, []>, Enc32 { let isPseudo = 0; @@ -1621,7 +1621,7 @@ defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>; defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>; //===----------------------------------------------------------------------===// -// GFX8, GFX9 (VI). +// GFX8 (VI), GFX9. //===----------------------------------------------------------------------===// class Select_vi : From de3cb9548d77726186db2d384193e0565cb0afc5 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 1 Oct 2020 16:08:44 -0700 Subject: [PATCH 329/544] Fix a bug in memset formation with vectors of non-integral pointers We were converting the non-integral store into a integer store which is not legal. --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 10 +++--- .../LoopIdiom/non-integral-pointers.ll | 32 +++++++++++++++---- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 837ef869ccf0f..38094377141fd 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -455,11 +455,6 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { if (!SI->isUnordered()) return LegalStoreKind::None; - // Don't convert stores of non-integral pointer types to memsets (which stores - // integers). - if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) - return LegalStoreKind::None; - // Avoid merging nontemporal stores. if (SI->getMetadata(LLVMContext::MD_nontemporal)) return LegalStoreKind::None; @@ -467,6 +462,11 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); + // Don't convert stores of non-integral pointer types to memsets (which stores + // integers). + if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + return LegalStoreKind::None; + // Reject stores that are so large that they overflow an unsigned. // When storing out scalable vectors we bail out for now, since the code // below currently only works for constant strides. diff --git a/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll b/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll index 6846e88253940..c8271306535b8 100644 --- a/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll +++ b/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll @@ -3,13 +3,12 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" target triple = "x86_64-unknown-linux-gnu" +; LIR'ing stores of pointers with address space 3 is fine, since +; they're integral pointers. define void @f_0(i8 addrspace(3)** %ptr) { ; CHECK-LABEL: @f_0( ; CHECK: call{{.*}}memset -; LIR'ing stores of pointers with address space 3 is fine, since -; they're integral pointers. - entry: br label %for.body @@ -25,13 +24,14 @@ for.end: ret void } +; LIR'ing stores of pointers with address space 4 is not ok, since +; they're non-integral pointers. NOTE: Zero is special value which +; can be converted, if we add said handling here, convert this test +; to use any non-null pointer. define void @f_1(i8 addrspace(4)** %ptr) { ; CHECK-LABEL: @f_1( ; CHECK-NOT: call{{.*}}memset -; LIR'ing stores of pointers with address space 4 is not ok, since -; they're non-integral pointers. - entry: br label %for.body @@ -46,3 +46,23 @@ for.body: for.end: ret void } + +; Same as previous case, but vector of non-integral pointers +define void @f_2(i8 addrspace(4)** %ptr) { +; CHECK-LABEL: @f_2( +; CHECK-NOT: call{{.*}}memset +entry: + br label %for.body + +for.body: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] + %arrayidx = getelementptr i8 addrspace(4)*, i8 addrspace(4)** %ptr, i64 %indvar + %addr = bitcast i8 addrspace(4)** %arrayidx to <2 x i8 addrspace(4)*>* + store <2 x i8 addrspace(4)*> zeroinitializer, <2 x i8 addrspace(4)*>* %addr, align 8 + %indvar.next = add i64 %indvar, 2 + %exitcond = icmp eq i64 %indvar.next, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} From aab6f7db471d577d313f334cba37667c35158420 Mon Sep 17 00:00:00 2001 From: Muhammad Asif Manzoor Date: Thu, 1 Oct 2020 19:39:48 -0400 Subject: [PATCH 330/544] [AArch64][SVE] Add lowering for llvm fabs Add the functionality to lower fabs for passthru variant Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D88679 --- .../Target/AArch64/AArch64ISelLowering.cpp | 8 +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 13 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 21 ++---- llvm/test/CodeGen/AArch64/sve-fp.ll | 69 +++++++++++++++++++ 5 files changed, 92 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fb70b2d801da0..d7d326fa019dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -191,6 +191,7 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::FCVTZS_MERGE_PASSTHRU: case AArch64ISD::FSQRT_MERGE_PASSTHRU: case AArch64ISD::FRECPX_MERGE_PASSTHRU: + case AArch64ISD::FABS_MERGE_PASSTHRU: return true; } } @@ -1054,6 +1055,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); + setOperationAction(ISD::FABS, VT, Custom); setOperationAction(ISD::FP_EXTEND, VT, Custom); setOperationAction(ISD::FP_ROUND, VT, Custom); } @@ -1592,6 +1594,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3521,6 +3524,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_frecpx: return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_fabs: + return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_convert_to_svbool: { EVT OutVT = Op.getValueType(); EVT InVT = Op.getOperand(1).getValueType(); @@ -3834,6 +3840,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU); case ISD::FSQRT: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU); + case ISD::FABS: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU); case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 1b8f62e427dbb..dc23fb838f970 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -95,6 +95,7 @@ enum NodeType : unsigned { // Predicated instructions with the result of inactive lanes provided by the // last operand. + FABS_MERGE_PASSTHRU, FCEIL_MERGE_PASSTHRU, FFLOOR_MERGE_PASSTHRU, FNEARBYINT_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e2c8eb9115cfa..d0b526ee47554 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -201,9 +201,10 @@ def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [ ]>; // Predicated operations with the result of inactive lanes provided by the last operand. -def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; -def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; +def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; +def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>; @@ -211,7 +212,7 @@ def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Ari def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; -def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>; def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ @@ -378,8 +379,8 @@ let Predicates = [HasSVE] in { defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>; defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>; - defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs, null_frag>; - defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", null_frag, AArch64fneg_mt>; + defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; + defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 45a712c897a44..7d5a0695035ea 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3802,24 +3802,17 @@ multiclass sve_int_un_pred_arit_1 opc, string asm, def : SVE_3_Op_Pat(NAME # _D)>; } -// TODO: Remove int_op once its last use is converted to ir_op. -multiclass sve_int_un_pred_arit_1_fp opc, string asm, - SDPatternOperator int_op, - SDPatternOperator ir_op> { +multiclass sve_int_un_pred_arit_1_fp opc, string asm, SDPatternOperator op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _H)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _S)>; - def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index 5334e66b22f7e..7ca1fdee7f32f 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -542,6 +542,68 @@ define @fsqrt_nxv2f64( %a) { ret %res } +; FABS + +define @fabs_nxv8f16( %a) { +; CHECK-LABEL: fabs_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fabs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.fabs.nxv8f16( %a) + ret %res +} + +define @fabs_nxv4f16( %a) { +; CHECK-LABEL: fabs_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fabs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.fabs.nxv4f16( %a) + ret %res +} + +define @fabs_nxv2f16( %a) { +; CHECK-LABEL: fabs_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fabs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.fabs.nxv2f16( %a) + ret %res +} + +define @fabs_nxv4f32( %a) { +; CHECK-LABEL: fabs_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fabs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.fabs.nxv4f32( %a) + ret %res +} + +define @fabs_nxv2f32( %a) { +; CHECK-LABEL: fabs_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fabs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.fabs.nxv2f32( %a) + ret %res +} + +define @fabs_nxv2f64( %a) { +; CHECK-LABEL: fabs_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fabs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.fabs.nxv2f64( %a) + ret %res +} + declare @llvm.aarch64.sve.frecps.x.nxv8f16(, ) declare @llvm.aarch64.sve.frecps.x.nxv4f32( , ) declare @llvm.aarch64.sve.frecps.x.nxv2f64(, ) @@ -564,5 +626,12 @@ declare @llvm.sqrt.nxv4f32() declare @llvm.sqrt.nxv2f32() declare @llvm.sqrt.nxv2f64() +declare @llvm.fabs.nxv8f16( ) +declare @llvm.fabs.nxv4f16( ) +declare @llvm.fabs.nxv2f16( ) +declare @llvm.fabs.nxv4f32() +declare @llvm.fabs.nxv2f32() +declare @llvm.fabs.nxv2f64() + ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2 From bb0344644a656734d707ab9c0baf6eb0533ac905 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 1 Oct 2020 16:44:12 -0700 Subject: [PATCH 331/544] [memcpyopt] Conservatively handle non-integral pointers If we allow the non-integral pointers to become memset and memcpy, we loose the ability to reason about pointer propagation. This patch is modeled on changes we've carried downstream for a long time, figured it was worth being equally conservative for other users. There is room to refine the semantics and handling here if anyone is motivated. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 18 ++++++++-- .../test/Transforms/MemCpyOpt/non-integral.ll | 36 +++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/MemCpyOpt/non-integral.ll diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0d66092a70359..01f3c322b1f49 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -352,8 +352,15 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; + Value *StoredVal = NextStore->getValueOperand(); + + // Don't convert stores of non-integral pointer types to memsets (which + // stores integers). + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + break; + // Check to see if this stored value is of the same byte-splattable value. - Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL); + Value *StoredByte = isBytewiseValue(StoredVal, DL); if (isa(ByteVal) && StoredByte) ByteVal = StoredByte; if (ByteVal != StoredByte) @@ -556,8 +563,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { const DataLayout &DL = SI->getModule()->getDataLayout(); + Value *StoredVal = SI->getValueOperand(); + + // Not all the transforms below are correct for non-integral pointers, bail + // until we've audited the individual pieces. + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) + return false; + // Load to store forwarding can be interpreted as memcpy. - if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { + if (LoadInst *LI = dyn_cast(StoredVal)) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { diff --git a/llvm/test/Transforms/MemCpyOpt/non-integral.ll b/llvm/test/Transforms/MemCpyOpt/non-integral.ll new file mode 100644 index 0000000000000..eecbea32adb57 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/non-integral.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -memcpyopt -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +define void @illegal_memset(i64 addrspace(1)** %p) { +; CHECK-LABEL: @illegal_memset( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = bitcast i64 addrspace(1)** [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P1]], i8 0, i64 8, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64 addrspace(1)*, i64 addrspace(1)** [[P]], i64 1 +; CHECK-NEXT: store i64 addrspace(1)* null, i64 addrspace(1)** [[GEP]], align 8 +; CHECK-NEXT: ret void +; +entry: + %p1 = bitcast i64 addrspace(1)** %p to i8* + call void @llvm.memset.p0i8.i64(i8* %p1, i8 0, i64 8, i32 0, i1 false) + %gep = getelementptr i64 addrspace(1)*, i64 addrspace(1)** %p, i64 1 + store i64 addrspace(1)* null, i64 addrspace(1)** %gep + ret void +} + +define void @illegal_memcpy(<2 x i8 addrspace(1)*>* noalias align 16 %a, +; CHECK-LABEL: @illegal_memcpy( +; CHECK-NEXT: [[VAL:%.*]] = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*>* [[A:%.*]], align 16 +; CHECK-NEXT: store <2 x i8 addrspace(1)*> [[VAL]], <2 x i8 addrspace(1)*>* [[B:%.*]], align 16 +; CHECK-NEXT: ret void +; + <2 x i8 addrspace(1)*>* noalias align 16 %b) { + %val = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*>* %a, align 16 + store <2 x i8 addrspace(1)*> %val, <2 x i8 addrspace(1)*>* %b, align 16 + ret void +} + +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)*, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) From 75a5ec1bad18ae1d741830cc46946da00fed6ed9 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 12:26:10 -0700 Subject: [PATCH 332/544] [flang][msvc] Rework a MSVC work-around to avoid clang warning A recent MSVC work-around patch is eliciting unused variable warnings from clang; package the lambda reference arguments into a struct to avoid the warning. Differential revision: https://reviews.llvm.org/D88695 --- flang/lib/Evaluate/fold-implementation.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index 8178b277d13b0..f89cbf7872204 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -1154,14 +1154,20 @@ Expr FoldOperation( if (auto array{ApplyElementwise(context, convert)}) { return *array; } + struct { + FoldingContext &context; + Convert &convert; + } msvcWorkaround{context, convert}; return std::visit( - [&context, &convert](auto &kindExpr) -> Expr { + [&msvcWorkaround](auto &kindExpr) -> Expr { using Operand = ResultType; // This variable is a workaround for msvc which emits an error when // using the FROMCAT template parameter below. TypeCategory constexpr FromCat{FROMCAT}; + auto &convert{msvcWorkaround.convert}; char buffer[64]; if (auto value{GetScalarConstantValue(kindExpr)}) { + FoldingContext &context{msvcWorkaround.context}; if constexpr (TO::category == TypeCategory::Integer) { if constexpr (Operand::category == TypeCategory::Integer) { auto converted{Scalar::ConvertSigned(*value)}; From 61687f3a48c254436cbdd55e10bfb23b727f3eb5 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 30 Sep 2020 13:10:17 -0700 Subject: [PATCH 333/544] [flang] Fix buffering read->write transition The buffer needs to be Reset() after a Flush(), since the Flush() can be a no-op after a read->write transition. And record numbers are 1-based, not 0-based. This fixes a bug with rewrites of records that have been recently read. Differential revision: https://reviews.llvm.org/D88612 --- flang/runtime/buffer.h | 2 +- flang/runtime/io-api.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/runtime/buffer.h b/flang/runtime/buffer.h index 27e0ac31579d7..c5bd5aedaaee8 100644 --- a/flang/runtime/buffer.h +++ b/flang/runtime/buffer.h @@ -94,7 +94,7 @@ template class FileFrame { start_ + (at - fileOffset_) + static_cast(bytes) > size_) { Flush(handler); - fileOffset_ = at; + Reset(at); Reallocate(bytes, handler); } dirty_ = true; diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index edd338af0fa77..0dcfabd2d8ced 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -518,7 +518,7 @@ bool IONAME(SetRec)(Cookie cookie, std::int64_t rec) { } connection.currentRecordNumber = rec; if (auto *unit{io.GetExternalFileUnit()}) { - unit->SetPosition(rec * *connection.recordLength); + unit->SetPosition((rec - 1) * *connection.recordLength); } return true; } From 78a9e62aa6f8f39fe8141e5486fca6db29947ecf Mon Sep 17 00:00:00 2001 From: jasonliu Date: Thu, 1 Oct 2020 23:35:31 +0000 Subject: [PATCH 334/544] [XCOFF] Enable -fdata-sections on AIX Summary: Some design decision worth noting about: I've noticed a recent mailing discussing about why string literal is not affected by -fdata-sections for ELF target: http://lists.llvm.org/pipermail/llvm-dev/2020-September/145121.html But on AIX, our linker could not split the mergeable string like other target. So I think it would make more sense for us to emit separate csect for every mergeable string in -fdata-sections mode, as there might not be other ways for linker to do garbage collection on unused mergeable string. Reviewed By: daltenty, hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D88339 --- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 50 +-- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 7 +- .../PowerPC/aix-xcoff-data-sections.ll | 312 ++++++++++++++++++ 3 files changed, 347 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-data-sections.ll diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 676a465c49e2c..92ffbec168ebd 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2055,11 +2055,10 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( MCSymbol * TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, const TargetMachine &TM) const { - if (TM.getDataSections()) - report_fatal_error("XCOFF unique data sections not yet implemented"); - // We always use a qualname symbol for a GV that represents // a declaration, a function descriptor, or a common symbol. + // If a GV represents a GlobalVariable and -fdata-sections is enabled, we + // also return a qualname so that a label symbol could be avoided. // It is inherently ambiguous when the GO represents the address of a // function, as the GO could either represent a function descriptor or a // function entry point. We choose to always return a function descriptor @@ -2074,15 +2073,12 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, return cast( getSectionForFunctionDescriptor(cast(GO), TM)) ->getQualNameSymbol(); - if (GOKind.isCommon() || GOKind.isBSSLocal()) + if (TM.getDataSections() || GOKind.isCommon() || GOKind.isBSSLocal()) return cast(SectionForGlobal(GO, GOKind, TM)) ->getQualNameSymbol(); } // For all other cases, fall back to getSymbol to return the unqualified name. - // This could change for a GV that is a GlobalVariable when we decide to - // support -fdata-sections since we could avoid having label symbols if the - // linkage name is applied to the csect symbol. return nullptr; } @@ -2107,9 +2103,6 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference( MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - assert(!TM.getDataSections() && - "XCOFF unique data sections not yet implemented."); - // Common symbols go into a csect with matching name which will get mapped // into the .bss section. if (Kind.isBSSLocal() || Kind.isCommon()) { @@ -2129,6 +2122,9 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( SmallString<128> Name; Name = SizeSpec + utostr(Alignment.value()); + if (TM.getDataSections()) + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD, Kind, /*BeginSymbolName*/ nullptr); } @@ -2141,20 +2137,32 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( return TextSection; } - if (Kind.isData() || Kind.isReadOnlyWithRel()) - // TODO: We may put this under option control, because user may want to - // have read-only data with relocations placed into a read-only section by - // the compiler. - return DataSection; - - // Zero initialized data must be emitted to the .data section because external - // linkage control sections that get mapped to the .bss section will be linked - // as tentative defintions, which is only appropriate for SectionKind::Common. - if (Kind.isBSS()) + // TODO: We may put Kind.isReadOnlyWithRel() under option control, because + // user may want to have read-only data with relocations placed into a + // read-only section by the compiler. + // For BSS kind, zero initialized data must be emitted to the .data section + // because external linkage control sections that get mapped to the .bss + // section will be linked as tentative defintions, which is only appropriate + // for SectionKind::Common. + if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) { + if (TM.getDataSections()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection(Name, XCOFF::XMC_RW, XCOFF::XTY_SD, + SectionKind::getData()); + } return DataSection; + } - if (Kind.isReadOnly()) + if (Kind.isReadOnly()) { + if (TM.getDataSections()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD, + SectionKind::getReadOnly()); + } return ReadOnlySection; + } report_fatal_error("XCOFF other section types not yet implemented."); } diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index b0ac8095c9877..4641f8e1c94de 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1809,11 +1809,16 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { MCSymbol *EmittedInitSym = GVSym; emitLinkage(GV, EmittedInitSym); emitAlignment(getGVAlignment(GV, DL), GV); - OutStreamer->emitLabel(EmittedInitSym); + // When -fdata-sections is enabled, every GlobalVariable will + // be put into its own csect; therefore, label is not necessary here. + if (!TM.getDataSections()) + OutStreamer->emitLabel(EmittedInitSym); + // Emit aliasing label for global variable. llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) { OutStreamer->emitLabel(getSymbol(Alias)); }); + emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); } diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data-sections.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data-sections.ll new file mode 100644 index 0000000000000..264c7b497863f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data-sections.ll @@ -0,0 +1,312 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff -data-sections < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,CHECK32 %s +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff -data-sections < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,CHECK64 %s +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff -filetype=obj -data-sections -o %t.o < %s +; RUN: llvm-objdump -D --symbol-description %t.o | FileCheck --check-prefix=CHECKOBJ %s +; RUN: llvm-readobj -syms %t.o | FileCheck --check-prefix=CHECKSYM %s + +@ivar = local_unnamed_addr global i32 35, align 4 +@const_ivar = constant i32 35, align 4 + +@a = common global i32 0, align 4 +@f = common local_unnamed_addr global i32 0, align 4 + +@.str = private unnamed_addr constant [9 x i8] c"abcdefgh\00", align 1 +@p = global i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), align 4 + +define i8 @foo() { +entry: + %0 = load i8*, i8** @p, align 4 + %1 = load i8, i8* %0, align 1 + ret i8 %1 +} + +define i32 @bar() { +entry: + %0 = load i32, i32* @ivar, align 4 + %1 = load i32, i32* @const_ivar, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* @a, align 4 + %add1 = add nsw i32 %add, %2 + %3 = load i32, i32* @f, align 4 + %add2 = add nsw i32 %add1, %3 + ret i32 %add2 +} + + +; CHECK: .csect ivar[RW],2 +; CHECK-NEXT: .globl ivar[RW] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, 35 # 0x23 +; CHECK-NEXT: .csect const_ivar[RO],2 +; CHECK-NEXT: .globl const_ivar[RO] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, 35 # 0x23 +; CHECK-NEXT: .comm a[RW],4,2 +; CHECK-NEXT: .comm f[RW],4,2 +; CHECK-NEXT: .csect .rodata.str1.1L...str[RO],2 +; CHECK-NEXT: .byte 'a,'b,'c,'d,'e,'f,'g,'h,0000 +; CHECK32: .csect p[RW],2 +; CHECK32-NEXT: .globl p[RW] +; CHECK32-NEXT: .align 2 +; CHECK32-NEXT: .vbyte 4, .rodata.str1.1L...str[RO] +; CHECK64: .csect p[RW],3 +; CHECK64-NEXT: .globl p[RW] +; CHECK64-NEXT: .align 3 +; CHECK64-NEXT: .vbyte 8, .rodata.str1.1L...str[RO] +; CHECK: .toc +; CHECK-NEXT: L..C0: +; CHECK-NEXT: .tc p[TC],p[RW] +; CHECK-NEXT: L..C1: +; CHECK-NEXT: .tc ivar[TC],ivar[RW] +; CHECK-NEXT: L..C2: +; CHECK-NEXT: .tc a[TC],a[RW] +; CHECK-NEXT: L..C3: +; CHECK-NEXT: .tc f[TC],f[RW] + +; CHECKOBJ: 00000038 (idx: 6) const_ivar[RO]: +; CHECKOBJ-NEXT: 38: 00 00 00 23 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 0000003c (idx: 8) .rodata.str1.1L...str[RO]: +; CHECKOBJ-NEXT: 3c: 61 62 63 64 +; CHECKOBJ-NEXT: 40: 65 66 67 68 +; CHECKOBJ-NEXT: 44: 00 00 00 00 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: Disassembly of section .data: +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 00000048 (idx: 10) ivar[RW]: +; CHECKOBJ-NEXT: 48: 00 00 00 23 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 0000004c (idx: 12) p[RW]: +; CHECKOBJ-NEXT: 4c: 00 00 00 3c +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 00000050 (idx: 14) foo[DS]: +; CHECKOBJ-NEXT: 50: 00 00 00 00 +; CHECKOBJ-NEXT: 54: 00 00 00 68 +; CHECKOBJ-NEXT: 58: 00 00 00 00 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 0000005c (idx: 16) bar[DS]: +; CHECKOBJ-NEXT: 5c: 00 00 00 10 +; CHECKOBJ-NEXT: 60: 00 00 00 68 +; CHECKOBJ-NEXT: 64: 00 00 00 00 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 00000068 (idx: 20) p[TC]: +; CHECKOBJ-NEXT: 68: 00 00 00 4c +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 0000006c (idx: 22) ivar[TC]: +; CHECKOBJ-NEXT: 6c: 00 00 00 48 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 00000070 (idx: 24) a[TC]: +; CHECKOBJ-NEXT: 70: 00 00 00 78 +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 00000074 (idx: 26) f[TC]: +; CHECKOBJ-NEXT: 74: 00 00 00 7c +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: Disassembly of section .bss: +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 00000078 (idx: 28) a[RW]: +; CHECKOBJ-NEXT: ... +; CHECKOBJ-EMPTY: +; CHECKOBJ-NEXT: 0000007c (idx: 30) f[RW]: +; CHECKOBJ-NEXT: ... + + +; CHECKSYM: Symbol { +; CHECKSYM: Name: const_ivar +; CHECKSYM: Value (RelocatableAddress): 0x38 +; CHECKSYM: Section: .text +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_EXT (0x2) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_RO (0x1) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: .rodata.str1.1L...str +; CHECKSYM: Value (RelocatableAddress): 0x3C +; CHECKSYM: Section: .text +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 9 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_RO (0x1) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: ivar +; CHECKSYM: Value (RelocatableAddress): 0x48 +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_EXT (0x2) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: p +; CHECKSYM: Value (RelocatableAddress): 0x4C +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_EXT (0x2) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: TOC +; CHECKSYM: Value (RelocatableAddress): 0x68 +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 0 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_TC0 (0xF) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: p +; CHECKSYM: Value (RelocatableAddress): 0x68 +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: ivar +; CHECKSYM: Value (RelocatableAddress): 0x6C +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: a +; CHECKSYM: Value (RelocatableAddress): 0x70 +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: f +; CHECKSYM: Value (RelocatableAddress): 0x74 +; CHECKSYM: Section: .data +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_SD (0x1) +; CHECKSYM: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: a +; CHECKSYM: Value (RelocatableAddress): 0x78 +; CHECKSYM: Section: .bss +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_EXT (0x2) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_CM (0x3) +; CHECKSYM: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } +; CHECKSYM: Symbol { +; CHECKSYM: Name: f +; CHECKSYM: Value (RelocatableAddress): 0x7C +; CHECKSYM: Section: .bss +; CHECKSYM: Type: 0x0 +; CHECKSYM: StorageClass: C_EXT (0x2) +; CHECKSYM: NumberOfAuxEntries: 1 +; CHECKSYM: CSECT Auxiliary Entry { +; CHECKSYM: SectionLen: 4 +; CHECKSYM: ParameterHashIndex: 0x0 +; CHECKSYM: TypeChkSectNum: 0x0 +; CHECKSYM: SymbolAlignmentLog2: 2 +; CHECKSYM: SymbolType: XTY_CM (0x3) +; CHECKSYM: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM: StabInfoIndex: 0x0 +; CHECKSYM: StabSectNum: 0x0 +; CHECKSYM: } +; CHECKSYM: } From a94d943f1a3f42efede7e908bb250c84f9f442b1 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 10:59:09 -0700 Subject: [PATCH 335/544] [flang] Fix actions at end of output record It turns out that unformatted fixed-size output records do need to be padded out if short, in order to avoid a spurious EOF crash on a short record at the end of the file. While here in AdvanceRecord(), move the unformatted variable-length record header/footer writing code to here from EndIoStatement(). Differential revision: https://reviews.llvm.org/D88685 --- flang/runtime/io-stmt.cpp | 26 -------------------------- flang/runtime/io-stmt.h | 1 - flang/runtime/unit.cpp | 33 +++++++++++++++++++++++++-------- 3 files changed, 25 insertions(+), 35 deletions(-) diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 45b5f2a95060d..7474dd94b982a 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -698,32 +698,6 @@ bool UnformattedIoStatementState::Emit( return ExternalIoStatementState::Emit(data, bytes, elementBytes); } -template -int UnformattedIoStatementState::EndIoStatement() { - ExternalFileUnit &unit{this->unit()}; - if constexpr (DIR == Direction::Output) { - if (unit.access == Access::Sequential && !unit.isFixedRecordLength) { - // Append the length of a sequential unformatted variable-length record - // as its footer, then overwrite the reserved first four bytes of the - // record with its length as its header. These four bytes were skipped - // over in BeginUnformattedOutput(). - // TODO: Break very large records up into subrecords with negative - // headers &/or footers - union { - std::uint32_t u; - char c[sizeof u]; - } u; - u.u = unit.furthestPositionInRecord - sizeof u; - // TODO: Convert record length to little-endian on big-endian host? - if (!(this->Emit(u.c, sizeof u) && - (this->HandleAbsolutePosition(0), this->Emit(u.c, sizeof u)))) { - return false; - } - } - } - return ExternalIoStatementState::EndIoStatement(); -} - template class InternalIoStatementState; template class InternalIoStatementState; template class InternalFormattedIoStatementState; diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 343619bc121cb..b5d3caff04f00 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -322,7 +322,6 @@ class UnformattedIoStatementState : public ExternalIoStatementState { using ExternalIoStatementState::ExternalIoStatementState; bool Receive(char *, std::size_t, std::size_t elementBytes = 0); bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); - int EndIoStatement(); }; class OpenStatementState : public ExternalIoStatementBase { diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 77b7a74551d8f..85d83ec50bd9c 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -406,15 +406,32 @@ bool ExternalFileUnit::AdvanceRecord(IoErrorHandler &handler) { FinishReadingRecord(handler); BeginReadingRecord(handler); } else { // Direction::Output - if (!isUnformatted) { - if (isFixedRecordLength && recordLength) { - if (furthestPositionInRecord < *recordLength) { - WriteFrame(frameOffsetInFile_, *recordLength, handler); - std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, - ' ', *recordLength - furthestPositionInRecord); - } + if (isFixedRecordLength && recordLength) { + // Pad remainder of fixed length record + if (furthestPositionInRecord < *recordLength) { + WriteFrame( + frameOffsetInFile_, recordOffsetInFrame_ + *recordLength, handler); + std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, + isUnformatted ? 0 : ' ', *recordLength - furthestPositionInRecord); + } + } else { + positionInRecord = furthestPositionInRecord; + if (isUnformatted) { + // Append the length of a sequential unformatted variable-length record + // as its footer, then overwrite the reserved first four bytes of the + // record with its length as its header. These four bytes were skipped + // over in BeginUnformattedIO(). + // TODO: Break very large records up into subrecords with negative + // headers &/or footers + std::uint32_t length; + length = furthestPositionInRecord - sizeof length; + ok &= Emit(reinterpret_cast(&length), sizeof length, + sizeof length, handler); + positionInRecord = 0; + ok &= Emit(reinterpret_cast(&length), sizeof length, + sizeof length, handler); } else { - positionInRecord = furthestPositionInRecord; + // Terminate formatted variable length record ok &= Emit("\n", 1, 1, handler); // TODO: Windows CR+LF } } From 3261aefc72b3769e8b3eccbb67e1145e195ffa8d Mon Sep 17 00:00:00 2001 From: peter klausler Date: Thu, 1 Oct 2020 12:12:46 -0700 Subject: [PATCH 336/544] [flang] Extend runtime API for PAUSE to allow a stop code Support integer and default character stop codes on PAUSE statements. Add length argument to STOP statement with a character stop code. Differential revision: https://reviews.llvm.org/D88692 --- flang/runtime/stop.cpp | 45 +++++++++++++++++++++++++++++++++--------- flang/runtime/stop.h | 4 +++- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/flang/runtime/stop.cpp b/flang/runtime/stop.cpp index 3b8c1385293ad..d417f44d175fa 100644 --- a/flang/runtime/stop.cpp +++ b/flang/runtime/stop.cpp @@ -64,26 +64,53 @@ static void CloseAllExternalUnits(const char *why) { } [[noreturn]] void RTNAME(StopStatementText)( - const char *code, bool isErrorStop, bool quiet) { + const char *code, std::size_t length, bool isErrorStop, bool quiet) { CloseAllExternalUnits("STOP statement"); if (!quiet) { - std::fprintf( - stderr, "Fortran %s: %s\n", isErrorStop ? "ERROR STOP" : "STOP", code); + std::fprintf(stderr, "Fortran %s: %.*s\n", + isErrorStop ? "ERROR STOP" : "STOP", static_cast(length), code); DescribeIEEESignaledExceptions(); } std::exit(EXIT_FAILURE); } -void RTNAME(PauseStatement)() { +static bool StartPause() { if (Fortran::runtime::io::IsATerminal(0)) { Fortran::runtime::io::IoErrorHandler handler{"PAUSE statement"}; Fortran::runtime::io::ExternalFileUnit::FlushAll(handler); + return true; + } + return false; +} + +static void EndPause() { + std::fflush(nullptr); + if (std::fgetc(stdin) == EOF) { + CloseAllExternalUnits("PAUSE statement"); + std::exit(EXIT_SUCCESS); + } +} + +void RTNAME(PauseStatement)() { + if (StartPause()) { std::fputs("Fortran PAUSE: hit RETURN to continue:", stderr); - std::fflush(nullptr); - if (std::fgetc(stdin) == EOF) { - CloseAllExternalUnits("PAUSE statement"); - std::exit(EXIT_SUCCESS); - } + EndPause(); + } +} + +void RTNAME(PauseStatementInt)(int code) { + if (StartPause()) { + std::fprintf(stderr, "Fortran PAUSE %d: hit RETURN to continue:", code); + EndPause(); + } +} + +void RTNAME(PauseStatementText)(const char *code, std::size_t length) { + if (StartPause()) { + std::fprintf(stderr, + "Fortran PAUSE %.*s: hit RETURN to continue:", static_cast(length), + code); + EndPause(); } } diff --git a/flang/runtime/stop.h b/flang/runtime/stop.h index 3d5f22e5761a2..638fa179edd3d 100644 --- a/flang/runtime/stop.h +++ b/flang/runtime/stop.h @@ -18,9 +18,11 @@ FORTRAN_EXTERN_C_BEGIN // Program-initiated image stop NORETURN void RTNAME(StopStatement)(int code DEFAULT_VALUE(EXIT_SUCCESS), bool isErrorStop DEFAULT_VALUE(false), bool quiet DEFAULT_VALUE(false)); -NORETURN void RTNAME(StopStatementText)(const char *, +NORETURN void RTNAME(StopStatementText)(const char *, size_t, bool isErrorStop DEFAULT_VALUE(false), bool quiet DEFAULT_VALUE(false)); void RTNAME(PauseStatement)(NO_ARGUMENTS); +void RTNAME(PauseStatementInt)(int); +void RTNAME(PauseStatementText)(const char *, size_t); NORETURN void RTNAME(FailImageStatement)(NO_ARGUMENTS); NORETURN void RTNAME(ProgramEndStatement)(NO_ARGUMENTS); From c1dcb573a861dc45be6e4cfc598b340c9079fc1f Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 1 Oct 2020 20:38:48 -0400 Subject: [PATCH 337/544] [flang][openacc] Update loop construct lowering Update the loop construct lowering to support multiple occurences of the same clauses such as private. Add some utility functions used by other constructs. Upstreaming part of https://github.com/flang-compiler/f18-llvm-project/pull/438/ Reviewed By: schweitz Differential Revision: https://reviews.llvm.org/D88253 --- flang/lib/Lower/OpenACC.cpp | 89 +++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index f91aff792cbd4..e1fb724fb92e1 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -30,20 +30,17 @@ getDesignatorNameIfDataRef(const Fortran::parser::Designator &designator) { static void genObjectList(const Fortran::parser::AccObjectList &objectList, Fortran::lower::AbstractConverter &converter, - std::int32_t &objectsCount, - SmallVector &operands) { + SmallVectorImpl &operands) { for (const auto &accObject : objectList.v) { std::visit( Fortran::common::visitors{ [&](const Fortran::parser::Designator &designator) { if (const auto *name = getDesignatorNameIfDataRef(designator)) { - ++objectsCount; const auto variable = converter.getSymbolAddress(*name->symbol); operands.push_back(variable); } }, [&](const Fortran::parser::Name &name) { - ++objectsCount; const auto variable = converter.getSymbolAddress(*name.symbol); operands.push_back(variable); }}, @@ -51,6 +48,45 @@ static void genObjectList(const Fortran::parser::AccObjectList &objectList, } } +static void addOperands(SmallVectorImpl &operands, + SmallVectorImpl &operandSegments, + const SmallVectorImpl &clauseOperands) { + operands.append(clauseOperands.begin(), clauseOperands.end()); + operandSegments.push_back(clauseOperands.size()); +} + +static void addOperand(SmallVectorImpl &operands, + SmallVectorImpl &operandSegments, + const Value &clauseOperand) { + if (clauseOperand) { + operands.push_back(clauseOperand); + operandSegments.push_back(1); + } else { + operandSegments.push_back(0); + } +} + +template +static Op createRegionOp(Fortran::lower::FirOpBuilder &builder, + mlir::Location loc, + const SmallVectorImpl &operands, + const SmallVectorImpl &operandSegments) { + llvm::ArrayRef argTy; + Op op = builder.create(loc, argTy, operands); + builder.createBlock(&op.getRegion()); + auto &block = op.getRegion().back(); + builder.setInsertionPointToStart(&block); + builder.create(loc); + + op.setAttr(Op::getOperandSegmentSizeAttr(), + builder.getI32VectorAttr(operandSegments)); + + // Place the insertion point to the start of the first block. + builder.setInsertionPointToStart(&block); + + return op; +} + static void genACC(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenACCLoopConstruct &loopConstruct) { @@ -73,11 +109,8 @@ static void genACC(Fortran::lower::AbstractConverter &converter, mlir::Value vectorLength; mlir::Value gangNum; mlir::Value gangStatic; - std::int32_t tileOperands = 0; - std::int32_t privateOperands = 0; - std::int32_t reductionOperands = 0; + SmallVector tileOperands, privateOperands, reductionOperands; std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE; - SmallVector operands; // Lower clauses values mapped to operands. for (const auto &clause : accClauseList.v) { @@ -90,7 +123,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, x.t)) { gangNum = converter.genExprValue( *Fortran::semantics::GetExpr(gangNumValue.value())); - operands.push_back(gangNum); } if (const auto &gangStaticValue = std::get>(x.t)) { @@ -107,7 +139,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, currentLocation, firOpBuilder.getIntegerType(32), /* STAR */ -1); } - operands.push_back(gangStatic); } } executionMapping |= mlir::acc::OpenACCExecMapping::GANG; @@ -117,7 +148,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, if (workerClause->v) { workerNum = converter.genExprValue( *Fortran::semantics::GetExpr(*workerClause->v)); - operands.push_back(workerNum); } executionMapping |= mlir::acc::OpenACCExecMapping::WORKER; } else if (const auto *vectorClause = @@ -126,7 +156,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, if (vectorClause->v) { vectorLength = converter.genExprValue( *Fortran::semantics::GetExpr(*vectorClause->v)); - operands.push_back(vectorLength); } executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR; } else if (const auto *tileClause = @@ -136,9 +165,8 @@ static void genACC(Fortran::lower::AbstractConverter &converter, const auto &expr = std::get>( accTileExpr.t); - ++tileOperands; if (expr) { - operands.push_back( + tileOperands.push_back( converter.genExprValue(*Fortran::semantics::GetExpr(*expr))); } else { // * was passed as value and will be represented as a -1 constant @@ -146,33 +174,31 @@ static void genACC(Fortran::lower::AbstractConverter &converter, mlir::Value tileStar = firOpBuilder.createIntegerConstant( currentLocation, firOpBuilder.getIntegerType(32), /* STAR */ -1); - operands.push_back(tileStar); + tileOperands.push_back(tileStar); } } } else if (const auto *privateClause = std::get_if( &clause.u)) { - const Fortran::parser::AccObjectList &accObjectList = privateClause->v; - genObjectList(accObjectList, converter, privateOperands, operands); + genObjectList(privateClause->v, converter, privateOperands); } // Reduction clause is left out for the moment as the clause will probably // end up having its own operation. } - auto loopOp = firOpBuilder.create(currentLocation, argTy, - operands); - - firOpBuilder.createBlock(&loopOp.getRegion()); - auto &block = loopOp.getRegion().back(); - firOpBuilder.setInsertionPointToStart(&block); - // ensure the block is well-formed. - firOpBuilder.create(currentLocation); + // Prepare the operand segement size attribute and the operands value range. + SmallVector operands; + SmallVector operandSegments; + addOperand(operands, operandSegments, gangNum); + addOperand(operands, operandSegments, gangStatic); + addOperand(operands, operandSegments, workerNum); + addOperand(operands, operandSegments, vectorLength); + addOperands(operands, operandSegments, tileOperands); + addOperands(operands, operandSegments, privateOperands); + addOperands(operands, operandSegments, reductionOperands); - loopOp.setAttr(mlir::acc::LoopOp::getOperandSegmentSizeAttr(), - firOpBuilder.getI32VectorAttr( - {gangNum ? 1 : 0, gangStatic ? 1 : 0, workerNum ? 1 : 0, - vectorLength ? 1 : 0, tileOperands, privateOperands, - reductionOperands})); + auto loopOp = createRegionOp( + firOpBuilder, currentLocation, operands, operandSegments); loopOp.setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(), firOpBuilder.getI64IntegerAttr(executionMapping)); @@ -199,9 +225,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, firOpBuilder.getUnitAttr()); } } - - // Place the insertion point to the start of the first block. - firOpBuilder.setInsertionPointToStart(&block); } } From 82453e759c77941cf2281ade79fb9b945b7e9458 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 30 Sep 2020 18:22:53 -0400 Subject: [PATCH 338/544] [OpenMP] Add Missing Runtime Call for Globalization Remarks Summary: Add a missing runtime call to perform data globalization checks. Reviewers: jdoerfert Subscribers: guansong hiraditya llvm-commits sstefan1 yaxunl Tags: #LLVM #OpenMP Differential Revision: https://reviews.llvm.org/D88621 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 37 +++++++++++-------- .../OpenMP/globalization_remarks.ll | 4 +- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d372f108e3d40..f1eb88e5ab343 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -705,24 +705,29 @@ struct OpenMPOpt { } void analysisGlobalization() { - auto &RFI = - OMPInfoCache.RFIs[OMPRTL___kmpc_data_sharing_coalesced_push_stack]; - - auto checkGlobalization = [&](Use &U, Function &Decl) { - if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { - auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA - << "Found thread data sharing on the GPU. " - << "Expect degraded performance due to data globalization."; - }; - emitRemark(CI, "OpenMPGlobalization", - Remark); - } + RuntimeFunction GlobalizationRuntimeIDs[] = { + OMPRTL___kmpc_data_sharing_coalesced_push_stack, + OMPRTL___kmpc_data_sharing_push_stack}; + + for (const auto GlobalizationCallID : GlobalizationRuntimeIDs) { + auto &RFI = OMPInfoCache.RFIs[GlobalizationCallID]; + + auto CheckGlobalization = [&](Use &U, Function &Decl) { + if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA + << "Found thread data sharing on the GPU. " + << "Expect degraded performance due to data globalization."; + }; + emitRemark(CI, "OpenMPGlobalization", + Remark); + } - return false; - }; + return false; + }; - RFI.foreachUse(SCC, checkGlobalization); + RFI.foreachUse(SCC, CheckGlobalization); + } return; } diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll index 49ad1076f6402..77d37736b7f12 100644 --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -59,7 +59,7 @@ entry: br i1 %.not, label %.non-spmd, label %.exit .non-spmd: ; preds = %entry - %1 = tail call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 128, i16 0) #4, !dbg !31 + %1 = tail call i8* @__kmpc_data_sharing_push_stack(i64 128, i16 0) #4, !dbg !31 %2 = bitcast i8* %1 to %struct._globalized_locals_ty* br label %.exit @@ -86,6 +86,8 @@ declare i8 @__kmpc_is_spmd_exec_mode() local_unnamed_addr declare i8* @__kmpc_data_sharing_coalesced_push_stack(i64, i16) local_unnamed_addr +declare i8* @__kmpc_data_sharing_push_stack(i64, i16) local_unnamed_addr + ; Function Attrs: nounwind readnone declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1 From c4690b007743d2f564bc1156fdbdbcaad2adddcc Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Fri, 2 Oct 2020 01:26:18 +0000 Subject: [PATCH 339/544] [PowerPC] Put the CR field in low bits of GRC during copying CRRC to GRC. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: How we copying the CRRC to GRC is using a single MFOCRF to copy the contents of CR field n (CR bits 4×n+32:4×n+35) into bits 4×n+32:4×n+35 of register GRC. That’s not correct because we expect the value of destination register equals to source so we have to put the the contents of CR field in the lowest 4 bits. This patch adds a RLWINM after MFOCRF to achieve that. The problem came up when adding builtins for xvtdivdp, xvtdivsp, xvtsqrtdp, xvtsqrtsp, as posted in D88278. We need to move the outputs (in CR register) to GRC. However outputs of these instructions may not in a fixed CR# register, so we can’t directly add a rotation instruction in the .td patterns, but need to wait until the CR register is determined. Then we confirmed this should be a bug in POST-RA PSEUDO PASS. Reviewed By: nemanjai, shchenz Differential Revision: https://reviews.llvm.org/D88274 --- llvm/lib/Target/PowerPC/PPCInstrHTM.td | 5 ++--- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 22 +++++++++++++++------- llvm/test/CodeGen/PowerPC/htm-ttest.ll | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td index 992ad8216f3bd..e59a08774dc58 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td +++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td @@ -164,9 +164,8 @@ def : Pat<(int_ppc_tsuspend), (TSR 0)>; def : Pat<(i64 (int_ppc_ttest)), - (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (TABORTWCI 0, (LI 0), 0), sub_32)), - 36, 28)>; + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), (TABORTWCI 0, (LI 0), 0), sub_32))>; } // [HasHTM] diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 469487eb6f7f6..cc0779cac6dd7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1272,14 +1272,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(31); return; } else if (PPC::CRRCRegClass.contains(SrcReg) && - PPC::G8RCRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg).addReg(SrcReg); - getKillRegState(KillSrc); - return; - } else if (PPC::CRRCRegClass.contains(SrcReg) && - PPC::GPRCRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); + (PPC::G8RCRegClass.contains(DestReg) || + PPC::GPRCRegClass.contains(DestReg))) { + bool Is64Bit = PPC::G8RCRegClass.contains(DestReg); + unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF; + unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM; + unsigned CRNum = TRI->getEncodingValue(SrcReg); + BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg); getKillRegState(KillSrc); + if (CRNum == 7) + return; + // Shift the CR bits to make the CR field in the lowest 4 bits of GRC. + BuildMI(MBB, I, DL, get(ShCode), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(CRNum * 4 + 4) + .addImm(28) + .addImm(31); return; } else if (PPC::G8RCRegClass.contains(SrcReg) && PPC::VSFRCRegClass.contains(DestReg)) { diff --git a/llvm/test/CodeGen/PowerPC/htm-ttest.ll b/llvm/test/CodeGen/PowerPC/htm-ttest.ll index bd9db165f09bf..42c28f6a546b4 100644 --- a/llvm/test/CodeGen/PowerPC/htm-ttest.ll +++ b/llvm/test/CodeGen/PowerPC/htm-ttest.ll @@ -8,7 +8,7 @@ define dso_local void @main() #0 { ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: tabortwci. 0, 3, 0 ; CHECK-NEXT: mfocrf 3, 128 -; CHECK-NEXT: rldicl 3, 3, 36, 28 +; CHECK-NEXT: srwi 3, 3, 28 ; CHECK-NEXT: rlwinm. 3, 3, 31, 30, 31 ; CHECK-NEXT: beqlr+ 0 ; CHECK-NEXT: # %bb.1: From 5136f4748a2b3302da581f6140ca453bb37f11e9 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Fri, 2 Oct 2020 09:58:36 +0900 Subject: [PATCH 340/544] CodeGen: Fix livein calculation in MachineBasicBlock splitAt Fix and simplify computation of liveins for new block. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D88535 --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 3 ++- llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 8a37a1e9d6f27..6272adcf114d8 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -961,9 +961,10 @@ MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI, if (UpdateLiveIns) { // Make sure we add any physregs we define in the block as liveins to the // new block. + MachineBasicBlock::iterator Prev(&MI); LiveRegs.init(*MF->getSubtarget().getRegisterInfo()); LiveRegs.addLiveOuts(*this); - for (auto I = rbegin(), E = SplitPoint.getReverse(); I != E; ++I) + for (auto I = rbegin(), E = Prev.getReverse(); I != E; ++I) LiveRegs.stepBackward(*I); } diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir index cc86f5b267bb2..57bedf7a7aa5e 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -233,7 +233,6 @@ body: | ; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc ; GCN: bb.3: ; GCN: successors: %bb.2(0x80000000) - ; GCN: liveins: $vgpr3 ; GCN: $vgpr3 = V_MOV_B32_e32 0, implicit $exec ; GCN: $sgpr4_sgpr5 = S_MOV_B64 32 ; GCN: bb.2: From a1e97923a025d09934b557ca4343d8e4b5a9973d Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 1 Oct 2020 18:50:29 -0700 Subject: [PATCH 341/544] Have kernel binary scanner load dSYMs as binary+dSYM if best thing found lldb's PlatforDarwinKernel scans the local filesystem (well known locations, plus user-specified directories) for kernels and kexts when doing kernel debugging, and loads them automatically. Sometimes kernel developers want to debug with *only* a dSYM, in which case they give lldb the DWARF binary + the dSYM as a binary and symbol file. This patch adds code to lldb to do this automatically if that's the best thing lldb can find. A few other bits of cleanup in PlatformDarwinKernel that I undertook at the same time: 1. Remove the 'platform.plugin.darwin-kernel.search-locally-for-kexts' setting. When I added the local filesystem index at start of kernel debugging, I thought people might object to the cost of the search and want a way to disable it. No one has. 2. Change the behavior of 'plugin.dynamic-loader.darwin-kernel.load-kexts' setting so it does not disable the local filesystem scan, or use of the local filesystem binaries. 3. PlatformDarwinKernel::GetSharedModule into GetSharedModuleKext and GetSharedModuleKernel for easier readability & maintenance. 4. Added accounting of .dSYM.yaa files (an archive format akin to tar) that I come across during the scan. I'm not using these for now; it would be very expensive to expand the archives & see if the UUID matches what I'm searching for. Differential Revision: https://reviews.llvm.org/D88632 --- .../DynamicLoaderDarwinKernel.cpp | 6 +- .../Platform/MacOSX/PlatformDarwinKernel.cpp | 424 ++++++++++++------ .../Platform/MacOSX/PlatformDarwinKernel.h | 32 +- .../MacOSX/PlatformMacOSXProperties.td | 4 - 4 files changed, 318 insertions(+), 148 deletions(-) diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp index 68a0335682d3a..d0d5a99b28edc 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp @@ -517,12 +517,8 @@ DynamicLoaderDarwinKernel::DynamicLoaderDarwinKernel(Process *process, Status error; PlatformSP platform_sp( Platform::Create(PlatformDarwinKernel::GetPluginNameStatic(), error)); - // Only select the darwin-kernel Platform if we've been asked to load kexts. - // It can take some time to scan over all of the kext info.plists and that - // shouldn't be done if kext loading is explicitly disabled. - if (platform_sp.get() && GetGlobalProperties()->GetLoadKexts()) { + if (platform_sp.get()) process->GetTarget().SetPlatform(platform_sp); - } } // Destructor diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp index f6c0f262a3798..54f49601e8112 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp @@ -199,13 +199,6 @@ class PlatformDarwinKernelProperties : public Properties { virtual ~PlatformDarwinKernelProperties() {} - bool GetSearchForKexts() const { - const uint32_t idx = ePropertySearchForKexts; - return m_collection_sp->GetPropertyAtIndexAsBoolean( - NULL, idx, - g_platformdarwinkernel_properties[idx].default_uint_value != 0); - } - FileSpecList GetKextDirectories() const { const uint32_t idx = ePropertyKextDirectories; const OptionValueFileSpecList *option_value = @@ -245,14 +238,12 @@ PlatformDarwinKernel::PlatformDarwinKernel( m_name_to_kext_path_map_with_dsyms(), m_name_to_kext_path_map_without_dsyms(), m_search_directories(), m_search_directories_no_recursing(), m_kernel_binaries_with_dsyms(), - m_kernel_binaries_without_dsyms(), - m_ios_debug_session(is_ios_debug_session) + m_kernel_binaries_without_dsyms(), m_kernel_dsyms_no_binaries(), + m_kernel_dsyms_yaas(), m_ios_debug_session(is_ios_debug_session) { - if (GetGlobalProperties()->GetSearchForKexts()) { - CollectKextAndKernelDirectories(); - SearchForKextsAndKernelsRecursively(); - } + CollectKextAndKernelDirectories(); + SearchForKextsAndKernelsRecursively(); } /// Destructor. @@ -293,6 +284,10 @@ void PlatformDarwinKernel::GetStatus(Stream &strm) { (int)m_kernel_binaries_with_dsyms.size()); strm.Printf(" Number of Kernel binaries without dSYMs indexed: %d\n", (int)m_kernel_binaries_without_dsyms.size()); + strm.Printf(" Number of Kernel dSYMs with no binaries indexed: %d\n", + (int)m_kernel_dsyms_no_binaries.size()); + strm.Printf(" Number of Kernel dSYM.yaa's indexed: %d\n", + (int)m_kernel_dsyms_yaas.size()); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM)); if (log) { @@ -305,14 +300,22 @@ void PlatformDarwinKernel::GetStatus(Stream &strm) { for (auto pos : m_name_to_kext_path_map_without_dsyms) { LLDB_LOGF(log, "%s", pos.second.GetPath().c_str()); } - LLDB_LOGF(log, "\nkernels with dSYMS"); + LLDB_LOGF(log, "\nkernel binaries with dSYMS"); for (auto fs : m_kernel_binaries_with_dsyms) { LLDB_LOGF(log, "%s", fs.GetPath().c_str()); } - LLDB_LOGF(log, "\nkernels without dSYMS"); + LLDB_LOGF(log, "\nkernel binaries without dSYMS"); for (auto fs : m_kernel_binaries_without_dsyms) { LLDB_LOGF(log, "%s", fs.GetPath().c_str()); } + LLDB_LOGF(log, "\nkernel dSYMS with no binaries"); + for (auto fs : m_kernel_dsyms_no_binaries) { + LLDB_LOGF(log, "%s", fs.GetPath().c_str()); + } + LLDB_LOGF(log, "\nkernels .dSYM.yaa's"); + for (auto fs : m_kernel_dsyms_yaas) { + LLDB_LOGF(log, "%s", fs.GetPath().c_str()); + } LLDB_LOGF(log, "\n"); } } @@ -497,56 +500,79 @@ PlatformDarwinKernel::GetKernelsAndKextsInDirectoryHelper( file_spec.GetPath().c_str()); PlatformDarwinKernel *thisp = (PlatformDarwinKernel *)baton; + + llvm::StringRef filename = file_spec.GetFilename().GetStringRef(); + bool is_kernel_filename = + filename.startswith("kernel") || filename.startswith("mach"); + bool is_dsym_yaa = filename.endswith(".dSYM.yaa"); + if (ft == llvm::sys::fs::file_type::regular_file || ft == llvm::sys::fs::file_type::symlink_file) { - ConstString filename = file_spec.GetFilename(); - if ((strncmp(filename.GetCString(), "kernel", 6) == 0 || - strncmp(filename.GetCString(), "mach", 4) == 0) && - file_spec_extension != g_dsym_suffix) { - if (KernelHasdSYMSibling(file_spec)) - { - LLDB_LOGF(log, - "PlatformDarwinKernel registering kernel binary '%s' with " - "dSYM sibling", - file_spec.GetPath().c_str()); - thisp->m_kernel_binaries_with_dsyms.push_back(file_spec); + if (is_kernel_filename) { + if (file_spec_extension != g_dsym_suffix && !is_dsym_yaa) { + if (KernelHasdSYMSibling(file_spec)) { + LLDB_LOGF(log, + "PlatformDarwinKernel registering kernel binary '%s' with " + "dSYM sibling", + file_spec.GetPath().c_str()); + thisp->m_kernel_binaries_with_dsyms.push_back(file_spec); + } else { + LLDB_LOGF( + log, + "PlatformDarwinKernel registering kernel binary '%s', no dSYM", + file_spec.GetPath().c_str()); + thisp->m_kernel_binaries_without_dsyms.push_back(file_spec); + } } - else - { - LLDB_LOGF( - log, "PlatformDarwinKernel registering kernel binary '%s', no dSYM", - file_spec.GetPath().c_str()); - thisp->m_kernel_binaries_without_dsyms.push_back(file_spec); + if (is_dsym_yaa) { + LLDB_LOGF(log, "PlatformDarwinKernel registering kernel .dSYM.yaa '%s'", + file_spec.GetPath().c_str()); + thisp->m_kernel_dsyms_yaas.push_back(file_spec); } return FileSystem::eEnumerateDirectoryResultNext; } - } else if (ft == llvm::sys::fs::file_type::directory_file && - file_spec_extension == g_kext_suffix) { - AddKextToMap(thisp, file_spec); - // Look to see if there is a PlugIns subdir with more kexts - FileSpec contents_plugins(file_spec.GetPath() + "/Contents/PlugIns"); - std::string search_here_too; - if (FileSystem::Instance().IsDirectory(contents_plugins)) { - search_here_too = contents_plugins.GetPath(); - } else { - FileSpec plugins(file_spec.GetPath() + "/PlugIns"); - if (FileSystem::Instance().IsDirectory(plugins)) { - search_here_too = plugins.GetPath(); - } - } + } else { + if (ft == llvm::sys::fs::file_type::directory_file) { + if (file_spec_extension == g_kext_suffix) { + AddKextToMap(thisp, file_spec); + // Look to see if there is a PlugIns subdir with more kexts + FileSpec contents_plugins(file_spec.GetPath() + "/Contents/PlugIns"); + std::string search_here_too; + if (FileSystem::Instance().IsDirectory(contents_plugins)) { + search_here_too = contents_plugins.GetPath(); + } else { + FileSpec plugins(file_spec.GetPath() + "/PlugIns"); + if (FileSystem::Instance().IsDirectory(plugins)) { + search_here_too = plugins.GetPath(); + } + } - if (!search_here_too.empty()) { - const bool find_directories = true; - const bool find_files = false; - const bool find_other = false; - FileSystem::Instance().EnumerateDirectory( - search_here_too.c_str(), find_directories, find_files, find_other, - recurse ? GetKernelsAndKextsInDirectoryWithRecursion - : GetKernelsAndKextsInDirectoryNoRecursion, - baton); + if (!search_here_too.empty()) { + const bool find_directories = true; + const bool find_files = false; + const bool find_other = false; + FileSystem::Instance().EnumerateDirectory( + search_here_too.c_str(), find_directories, find_files, find_other, + recurse ? GetKernelsAndKextsInDirectoryWithRecursion + : GetKernelsAndKextsInDirectoryNoRecursion, + baton); + } + return FileSystem::eEnumerateDirectoryResultNext; + } + // Do we have a kernel dSYM with no kernel binary? + if (is_kernel_filename && file_spec_extension == g_dsym_suffix) { + if (KerneldSYMHasNoSiblingBinary(file_spec)) { + LLDB_LOGF(log, + "PlatformDarwinKernel registering kernel dSYM '%s' with " + "no binary sibling", + file_spec.GetPath().c_str()); + thisp->m_kernel_dsyms_no_binaries.push_back(file_spec); + return FileSystem::eEnumerateDirectoryResultNext; + } + } } - return FileSystem::eEnumerateDirectoryResultNext; } + // Don't recurse into dSYM/kext/bundle directories if (recurse && file_spec_extension != g_dsym_suffix && file_spec_extension != g_kext_suffix && @@ -642,6 +668,63 @@ bool PlatformDarwinKernel::KernelHasdSYMSibling(const FileSpec &kernel_binary) { return FileSystem::Instance().IsDirectory(kernel_dsym); } +// Given a FileSpec of /dir/dir/mach.development.t7004.dSYM +// Return true if only the dSYM exists, no binary next to it. +// /dir/dir/mach.development.t7004.dSYM +// but no +// /dir/dir/mach.development.t7004 +bool PlatformDarwinKernel::KerneldSYMHasNoSiblingBinary( + const FileSpec &kernel_dsym) { + static ConstString g_dsym_suffix = ConstString(".dSYM"); + std::string possible_path = kernel_dsym.GetPath(); + if (kernel_dsym.GetFileNameExtension() != g_dsym_suffix) + return false; + + FileSpec binary_filespec = kernel_dsym; + // Chop off the '.dSYM' extension on the filename + binary_filespec.GetFilename() = + binary_filespec.GetFileNameStrippingExtension(); + + // Is there a binary next to this this? Then return false. + if (FileSystem::Instance().Exists(binary_filespec)) + return false; + + // If we have at least one binary in the DWARF subdir, then + // this is a properly formed dSYM and it has no binary next + // to it. + if (GetDWARFBinaryInDSYMBundle(kernel_dsym).size() > 0) + return true; + + return false; +} + +// TODO: This method returns a vector of FileSpec's because a +// dSYM bundle may contain multiple DWARF binaries, but it +// only implements returning the base name binary for now; +// it should iterate over every binary in the DWARF subdir +// and return them all. +std::vector +PlatformDarwinKernel::GetDWARFBinaryInDSYMBundle(FileSpec dsym_bundle) { + std::vector results; + static ConstString g_dsym_suffix = ConstString(".dSYM"); + if (dsym_bundle.GetFileNameExtension() != g_dsym_suffix) { + return results; + } + // Drop the '.dSYM' from the filename + std::string filename = + dsym_bundle.GetFileNameStrippingExtension().GetCString(); + std::string dirname = dsym_bundle.GetDirectory().GetCString(); + + std::string binary_filepath = dsym_bundle.GetPath(); + binary_filepath += "/Contents/Resources/DWARF/"; + binary_filepath += filename; + + FileSpec binary_fspec(binary_filepath); + if (FileSystem::Instance().Exists(binary_fspec)) + results.push_back(binary_fspec); + return results; +} + Status PlatformDarwinKernel::GetSharedModule( const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp, const FileSpecList *module_search_paths_ptr, ModuleSP *old_module_sp_ptr, @@ -653,111 +736,176 @@ Status PlatformDarwinKernel::GetSharedModule( // Treat the file's path as a kext bundle ID (e.g. // "com.apple.driver.AppleIRController") and search our kext index. std::string kext_bundle_id = platform_file.GetPath(); - if (!kext_bundle_id.empty()) { - ConstString kext_bundle_cs(kext_bundle_id.c_str()); - - // First look through the kext bundles that had a dsym next to them - if (m_name_to_kext_path_map_with_dsyms.count(kext_bundle_cs) > 0) { - for (BundleIDToKextIterator it = - m_name_to_kext_path_map_with_dsyms.begin(); - it != m_name_to_kext_path_map_with_dsyms.end(); ++it) { - if (it->first == kext_bundle_cs) { - error = ExamineKextForMatchingUUID(it->second, module_spec.GetUUID(), - module_spec.GetArchitecture(), - module_sp); - if (module_sp.get()) { - return error; - } - } - } - } + if (!kext_bundle_id.empty() && module_spec.GetUUID().IsValid()) { + if (kext_bundle_id == "mach_kernel") { + return GetSharedModuleKernel(module_spec, process, module_sp, + module_search_paths_ptr, old_module_sp_ptr, + did_create_ptr); + } else { + return GetSharedModuleKext(module_spec, process, module_sp, + module_search_paths_ptr, old_module_sp_ptr, + did_create_ptr); + } + } else { // Give the generic methods, including possibly calling into DebugSymbols // framework on macOS systems, a chance. - error = PlatformDarwin::GetSharedModule(module_spec, process, module_sp, + return PlatformDarwin::GetSharedModule(module_spec, process, module_sp, module_search_paths_ptr, old_module_sp_ptr, did_create_ptr); - if (error.Success() && module_sp.get()) { - return error; + } +} + +Status PlatformDarwinKernel::GetSharedModuleKext( + const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp, + const FileSpecList *module_search_paths_ptr, ModuleSP *old_module_sp_ptr, + bool *did_create_ptr) { + Status error; + module_sp.reset(); + const FileSpec &platform_file = module_spec.GetFileSpec(); + + // Treat the file's path as a kext bundle ID (e.g. + // "com.apple.driver.AppleIRController") and search our kext index. + ConstString kext_bundle(platform_file.GetPath().c_str()); + // First look through the kext bundles that had a dsym next to them + if (m_name_to_kext_path_map_with_dsyms.count(kext_bundle) > 0) { + for (BundleIDToKextIterator it = m_name_to_kext_path_map_with_dsyms.begin(); + it != m_name_to_kext_path_map_with_dsyms.end(); ++it) { + if (it->first == kext_bundle) { + error = ExamineKextForMatchingUUID(it->second, module_spec.GetUUID(), + module_spec.GetArchitecture(), + module_sp); + if (module_sp.get()) { + return error; + } + } + } + } + + // Give the generic methods, including possibly calling into DebugSymbols + // framework on macOS systems, a chance. + error = PlatformDarwin::GetSharedModule(module_spec, process, module_sp, + module_search_paths_ptr, + old_module_sp_ptr, did_create_ptr); + if (error.Success() && module_sp.get()) { + return error; + } + + // Lastly, look through the kext binarys without dSYMs + if (m_name_to_kext_path_map_without_dsyms.count(kext_bundle) > 0) { + for (BundleIDToKextIterator it = + m_name_to_kext_path_map_without_dsyms.begin(); + it != m_name_to_kext_path_map_without_dsyms.end(); ++it) { + if (it->first == kext_bundle) { + error = ExamineKextForMatchingUUID(it->second, module_spec.GetUUID(), + module_spec.GetArchitecture(), + module_sp); + if (module_sp.get()) { + return error; + } + } } + } + return error; +} - // Lastly, look through the kext binarys without dSYMs - if (m_name_to_kext_path_map_without_dsyms.count(kext_bundle_cs) > 0) { - for (BundleIDToKextIterator it = - m_name_to_kext_path_map_without_dsyms.begin(); - it != m_name_to_kext_path_map_without_dsyms.end(); ++it) { - if (it->first == kext_bundle_cs) { - error = ExamineKextForMatchingUUID(it->second, module_spec.GetUUID(), - module_spec.GetArchitecture(), - module_sp); - if (module_sp.get()) { +Status PlatformDarwinKernel::GetSharedModuleKernel( + const ModuleSpec &module_spec, Process *process, ModuleSP &module_sp, + const FileSpecList *module_search_paths_ptr, ModuleSP *old_module_sp_ptr, + bool *did_create_ptr) { + Status error; + module_sp.reset(); + + // First try all kernel binaries that have a dSYM next to them + for (auto possible_kernel : m_kernel_binaries_with_dsyms) { + if (FileSystem::Instance().Exists(possible_kernel)) { + ModuleSpec kern_spec(possible_kernel); + kern_spec.GetUUID() = module_spec.GetUUID(); + module_sp.reset(new Module(kern_spec)); + if (module_sp && module_sp->GetObjectFile() && + module_sp->MatchesModuleSpec(kern_spec)) { + // module_sp is an actual kernel binary we want to add. + if (process) { + process->GetTarget().GetImages().AppendIfNeeded(module_sp); + error.Clear(); + return error; + } else { + error = ModuleList::GetSharedModule(kern_spec, module_sp, nullptr, + nullptr, nullptr); + if (module_sp && module_sp->GetObjectFile() && + module_sp->GetObjectFile()->GetType() != + ObjectFile::Type::eTypeCoreFile) { return error; } + module_sp.reset(); } } } } - if (kext_bundle_id == "mach_kernel" && module_spec.GetUUID().IsValid()) { - // First try all kernel binaries that have a dSYM next to them - for (auto possible_kernel : m_kernel_binaries_with_dsyms) { - if (FileSystem::Instance().Exists(possible_kernel)) { - ModuleSpec kern_spec(possible_kernel); - kern_spec.GetUUID() = module_spec.GetUUID(); - ModuleSP module_sp(new Module(kern_spec)); - if (module_sp && module_sp->GetObjectFile() && - module_sp->MatchesModuleSpec(kern_spec)) { - // module_sp is an actual kernel binary we want to add. - if (process) { - process->GetTarget().GetImages().AppendIfNeeded(module_sp); - error.Clear(); + // Next try all dSYMs that have no kernel binary next to them (load + // the kernel DWARF stub as the main binary) + for (auto possible_kernel_dsym : m_kernel_dsyms_no_binaries) { + std::vector objfile_names = + GetDWARFBinaryInDSYMBundle(possible_kernel_dsym); + for (FileSpec objfile : objfile_names) { + ModuleSpec kern_spec(objfile); + kern_spec.GetUUID() = module_spec.GetUUID(); + kern_spec.GetSymbolFileSpec() = possible_kernel_dsym; + + module_sp.reset(new Module(kern_spec)); + if (module_sp && module_sp->GetObjectFile() && + module_sp->MatchesModuleSpec(kern_spec)) { + // module_sp is an actual kernel binary we want to add. + if (process) { + process->GetTarget().GetImages().AppendIfNeeded(module_sp); + error.Clear(); + return error; + } else { + error = ModuleList::GetSharedModule(kern_spec, module_sp, nullptr, + nullptr, nullptr); + if (module_sp && module_sp->GetObjectFile() && + module_sp->GetObjectFile()->GetType() != + ObjectFile::Type::eTypeCoreFile) { return error; - } else { - error = ModuleList::GetSharedModule(kern_spec, module_sp, NULL, - NULL, NULL); - if (module_sp && module_sp->GetObjectFile() && - module_sp->GetObjectFile()->GetType() != - ObjectFile::Type::eTypeCoreFile) { - return error; - } - module_sp.reset(); } + module_sp.reset(); } } } + } - // Give the generic methods, including possibly calling into DebugSymbols - // framework on macOS systems, a chance. - error = PlatformDarwin::GetSharedModule(module_spec, process, module_sp, - module_search_paths_ptr, - old_module_sp_ptr, did_create_ptr); - if (error.Success() && module_sp.get()) { - return error; - } + // Give the generic methods, including possibly calling into DebugSymbols + // framework on macOS systems, a chance. + error = PlatformDarwin::GetSharedModule(module_spec, process, module_sp, + module_search_paths_ptr, + old_module_sp_ptr, did_create_ptr); + if (error.Success() && module_sp.get()) { + return error; + } - // Next try all kernel binaries that don't have a dSYM - for (auto possible_kernel : m_kernel_binaries_without_dsyms) { - if (FileSystem::Instance().Exists(possible_kernel)) { - ModuleSpec kern_spec(possible_kernel); - kern_spec.GetUUID() = module_spec.GetUUID(); - ModuleSP module_sp(new Module(kern_spec)); - if (module_sp && module_sp->GetObjectFile() && - module_sp->MatchesModuleSpec(kern_spec)) { - // module_sp is an actual kernel binary we want to add. - if (process) { - process->GetTarget().GetImages().AppendIfNeeded(module_sp); - error.Clear(); + // Lastly, try all kernel binaries that don't have a dSYM + for (auto possible_kernel : m_kernel_binaries_without_dsyms) { + if (FileSystem::Instance().Exists(possible_kernel)) { + ModuleSpec kern_spec(possible_kernel); + kern_spec.GetUUID() = module_spec.GetUUID(); + module_sp.reset(new Module(kern_spec)); + if (module_sp && module_sp->GetObjectFile() && + module_sp->MatchesModuleSpec(kern_spec)) { + // module_sp is an actual kernel binary we want to add. + if (process) { + process->GetTarget().GetImages().AppendIfNeeded(module_sp); + error.Clear(); + return error; + } else { + error = ModuleList::GetSharedModule(kern_spec, module_sp, nullptr, + nullptr, nullptr); + if (module_sp && module_sp->GetObjectFile() && + module_sp->GetObjectFile()->GetType() != + ObjectFile::Type::eTypeCoreFile) { return error; - } else { - error = ModuleList::GetSharedModule(kern_spec, module_sp, NULL, - NULL, NULL); - if (module_sp && module_sp->GetObjectFile() && - module_sp->GetObjectFile()->GetType() != - ObjectFile::Type::eTypeCoreFile) { - return error; - } - module_sp.reset(); } + module_sp.reset(); } } } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h index 9cf9e41208eb8..cd9e9d70f8ed8 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h @@ -126,7 +126,30 @@ class PlatformDarwinKernel : public PlatformDarwin { // Returns true if there is a .dSYM bundle next to the kernel static bool - KernelHasdSYMSibling(const lldb_private::FileSpec &kext_bundle_filepath); + KernelHasdSYMSibling(const lldb_private::FileSpec &kernel_filepath); + + // Returns true if there is a .dSYM bundle with NO kernel binary next to it + static bool KerneldSYMHasNoSiblingBinary( + const lldb_private::FileSpec &kernel_dsym_filepath); + + // Given a dsym_bundle argument ('.../foo.dSYM'), return a FileSpec + // with the binary inside it ('.../foo.dSYM/Contents/Resources/DWARF/foo'). + // A dSYM bundle may have multiple DWARF binaries in them, so a vector + // of matches is returned. + static std::vector + GetDWARFBinaryInDSYMBundle(lldb_private::FileSpec dsym_bundle); + + lldb_private::Status + GetSharedModuleKext(const lldb_private::ModuleSpec &module_spec, + lldb_private::Process *process, lldb::ModuleSP &module_sp, + const lldb_private::FileSpecList *module_search_paths_ptr, + lldb::ModuleSP *old_module_sp_ptr, bool *did_create_ptr); + + lldb_private::Status GetSharedModuleKernel( + const lldb_private::ModuleSpec &module_spec, + lldb_private::Process *process, lldb::ModuleSP &module_sp, + const lldb_private::FileSpecList *module_search_paths_ptr, + lldb::ModuleSP *old_module_sp_ptr, bool *did_create_ptr); lldb_private::Status ExamineKextForMatchingUUID(const lldb_private::FileSpec &kext_bundle_path, @@ -170,6 +193,13 @@ class PlatformDarwinKernel : public PlatformDarwin { // on local // filesystem, with // dSYMs next to them + KernelBinaryCollection m_kernel_dsyms_no_binaries; // list of kernel + // dsyms with no + // binaries next to + // them + KernelBinaryCollection m_kernel_dsyms_yaas; // list of kernel + // .dSYM.yaa files + lldb_private::LazyBool m_ios_debug_session; PlatformDarwinKernel(const PlatformDarwinKernel &) = delete; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSXProperties.td b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSXProperties.td index 07e4e3e81d8c5..39e9641daae04 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSXProperties.td +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSXProperties.td @@ -1,10 +1,6 @@ include "../../../../include/lldb/Core/PropertiesBase.td" let Definition = "platformdarwinkernel" in { - def SearchForKexts: Property<"search-locally-for-kexts", "Boolean">, - Global, - DefaultTrue, - Desc<"Automatically search for kexts on the local system when doing kernel debugging.">; def KextDirectories: Property<"kext-directories", "FileSpecList">, DefaultStringValue<"">, Desc<"Directories/KDKs to search for kexts in when starting a kernel debug session.">; From 2ef9d21e1a3cf8a58049921c785de1487fbcd7e1 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Fri, 2 Oct 2020 10:52:06 +0900 Subject: [PATCH 342/544] [AMDGPU] SIInsertSkips: Tidy block splitting to use splitAt Convert to use new MachineBasicBlock splitAt function. Place code in splitBlock function for reuse in future changes. Should yield no functional change. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D88537 --- llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 44 +++++++++++------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 052db5f6ea718..9317c185623b4 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -191,6 +191,21 @@ void SIInsertSkips::createEarlyExitBlock(MachineBasicBlock &MBB) { generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII); } +static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, + MachineDominatorTree *MDT) { + MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true); + + // Update dominator tree + using DomTreeT = DomTreeBase; + SmallVector DTUpdates; + for (MachineBasicBlock *Succ : SplitBB->successors()) { + DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ}); + DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ}); + } + DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB}); + MDT->getBase().applyUpdates(DTUpdates); +} + /// Insert an "if exec=0 { null export; s_endpgm }" sequence before the given /// iterator. Only applies to pixel shaders. void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB, @@ -223,33 +238,14 @@ void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB, NextBBI = std::next(MBB.getIterator()); } - auto BranchMI = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) - .addMBB(EarlyExitBlock); + MachineInstr *BranchMI = + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) + .addMBB(EarlyExitBlock); // Split the block if the branch will not come at the end. auto Next = std::next(BranchMI->getIterator()); - if (Next != MBB.end() && !Next->isTerminator()) { - MachineBasicBlock *SplitBB = - MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - MF->insert(NextBBI, SplitBB); - SplitBB->splice(SplitBB->begin(), &MBB, I, MBB.end()); - SplitBB->transferSuccessorsAndUpdatePHIs(&MBB); - // FIXME: the expectation is that this will be used near the beginning - // of a block so just assume all registers are still live. - for (auto LiveIn : MBB.liveins()) - SplitBB->addLiveIn(LiveIn); - MBB.addSuccessor(SplitBB); - - // Update dominator tree - using DomTreeT = DomTreeBase; - SmallVector DTUpdates; - for (MachineBasicBlock *Succ : SplitBB->successors()) { - DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ}); - DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ}); - } - DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB}); - MDT->getBase().applyUpdates(DTUpdates); - } + if (Next != MBB.end() && !Next->isTerminator()) + splitBlock(MBB, *BranchMI, MDT); MBB.addSuccessor(EarlyExitBlock); MDT->getBase().insertEdge(&MBB, EarlyExitBlock); From f29645e7afdbb8d1fc2dd603c0b128bac055625c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 1 Oct 2020 19:17:21 -0700 Subject: [PATCH 343/544] [gvn] Handle a corner case w/vectors of non-integral pointers If we try to coerce a vector of non-integral pointers to a narrower type (either narrower vector or single pointer), we use inttoptr and violate the semantics of non-integral pointers. In theory, we can handle many of these cases, we just need to use a different code idiom to convert without going through inttoptr and back. This shows up as wrong code bugs, and in some cases, crashes due to failed asserts. Modeled after a change which has lived downstream for a couple years, though completely rewritten to be more idiomatic. --- llvm/lib/Transforms/Utils/VNCoercion.cpp | 23 ++++---- .../Transforms/GVN/non-integral-pointers.ll | 52 +++++++++++++++++-- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index 1939c0e3b504b..11b42eca4960b 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -17,6 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) { bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { Type *StoredTy = StoredVal->getType(); + if (StoredTy == LoadTy) return true; @@ -46,6 +47,14 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, return CI->isNullValue(); return false; } + + + // The implementation below uses inttoptr for vectors of unequal size; we + // can't allow this for non integral pointers. Wecould teach it to extract + // exact subvectors if desired. + if (DL.isNonIntegralPointerType(StoredTy->getScalarType()) && + StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize()) + return false; return true; } @@ -223,14 +232,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, if (isFirstClassAggregateOrScalableType(StoredVal->getType())) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // Allow casts of zero values to null as a special case - auto *CI = dyn_cast(StoredVal); - if (!CI || !CI->isNullValue()) - return -1; - } + if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) + return -1; Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = @@ -333,9 +336,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) + if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) return -1; Value *DepPtr = DepLI->getPointerOperand(); diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll index a017dda926e3a..872b6648084e1 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll @@ -202,7 +202,7 @@ define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* % ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]] +; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]], align 8 ; CHECK-NEXT: ret i64 addrspace(4)* [[REF]] ; entry: @@ -219,7 +219,7 @@ define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc +; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8 ; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] ; entry: @@ -266,7 +266,7 @@ define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)* ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]] +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]], align 32 ; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]] ; entry: @@ -282,7 +282,7 @@ define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]] +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]], align 32 ; CHECK-NEXT: ret <4 x i64> [[REF]] ; entry: @@ -386,3 +386,47 @@ entry: %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off ret i8 addrspace(4)* %ref } + + +define void @smaller_vector(i8* %p) { +; CHECK-LABEL: @smaller_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>* +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to <2 x i64 addrspace(4)*>* +; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32 +; CHECK-NEXT: [[V2:%.*]] = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* [[B]], align 32 +; CHECK-NEXT: call void @use.v2(<2 x i64 addrspace(4)*> [[V2]]) +; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]]) +; CHECK-NEXT: ret void +; +entry: + %a = bitcast i8* %p to <4 x i64 addrspace(4)*>* + %b = bitcast i8* %p to <2 x i64 addrspace(4)*>* + %v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32 + %v2 = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* %b, align 32 + call void @use.v2(<2 x i64 addrspace(4)*> %v2) + call void @use.v4(<4 x i64 addrspace(4)*> %v4) + ret void +} + +define i64 addrspace(4)* @vector_extract(i8* %p) { +; CHECK-LABEL: @vector_extract( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>* +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to i64 addrspace(4)** +; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32 +; CHECK-NEXT: [[RES:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)** [[B]], align 32 +; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]]) +; CHECK-NEXT: ret i64 addrspace(4)* [[RES]] +; +entry: + %a = bitcast i8* %p to <4 x i64 addrspace(4)*>* + %b = bitcast i8* %p to i64 addrspace(4)** + %v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32 + %res = load i64 addrspace(4)*, i64 addrspace(4)** %b, align 32 + call void @use.v4(<4 x i64 addrspace(4)*> %v4) + ret i64 addrspace(4)* %res +} + +declare void @use.v2(<2 x i64 addrspace(4)*>) +declare void @use.v4(<4 x i64 addrspace(4)*>) From b8ac19cf1cca5faec8b4404bb0f666cb63c9e1de Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 2 Oct 2020 10:20:06 +0700 Subject: [PATCH 344/544] [SCEV] Limited support for unsigned preds in isImpliedViaOperations The logic there only considers `SLT/SGT` predicates. We can use the same logic for proving `ULT/UGT` predicates if all involved values are non-negative. Adding full-scale support for unsigned might be challenging because of code amount, so we can consider this in the future. Differential Revision: https://reviews.llvm.org/D88087 Reviewed By: reames --- llvm/lib/Analysis/ScalarEvolution.cpp | 26 ++++++++++++--- .../Analysis/ScalarEvolutionTest.cpp | 33 +++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index a3e454fefcf0f..70d37cb73fd16 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10236,13 +10236,31 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, // We want to avoid hurting the compile time with analysis of too big trees. if (Depth > MaxSCEVOperationsImplicationDepth) return false; - // We only want to work with ICMP_SGT comparison so far. - // TODO: Extend to ICMP_UGT? - if (Pred == ICmpInst::ICMP_SLT) { - Pred = ICmpInst::ICMP_SGT; + + // We only want to work with GT comparison so far. + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) { + Pred = CmpInst::getSwappedPredicate(Pred); std::swap(LHS, RHS); std::swap(FoundLHS, FoundRHS); } + + // For unsigned, try to reduce it to corresponding signed comparison. + if (Pred == ICmpInst::ICMP_UGT) + // We can replace unsigned predicate with its signed counterpart if all + // involved values are non-negative. + // TODO: We could have better support for unsigned. + if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) { + // Knowing that both FoundLHS and FoundRHS are non-negative, and knowing + // FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us + // use this fact to prove that LHS and RHS are non-negative. + const SCEV *MinusOne = getNegativeSCEV(getOne(LHS->getType())); + if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS, + FoundRHS) && + isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS, + FoundRHS)) + Pred = ICmpInst::ICMP_SGT; + } + if (Pred != ICmpInst::ICMP_SGT) return false; diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index e5ffc21fb6646..be8941838f71a 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1283,4 +1283,37 @@ TEST_F(ScalarEvolutionsTest, ImpliedViaAddRecStart) { }); } +TEST_F(ScalarEvolutionsTest, UnsignedIsImpliedViaOperations) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = + parseAssemblyString("define void @foo(i32* %p1, i32* %p2) { " + "entry: " + " %x = load i32, i32* %p1, !range !0 " + " %cond = icmp ne i32 %x, 0 " + " br i1 %cond, label %guarded, label %exit " + "guarded: " + " %y = add i32 %x, -1 " + " ret void " + "exit: " + " ret void " + "} " + "!0 = !{i32 0, i32 2147483647}", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *X = SE.getSCEV(getInstructionByName(F, "x")); + auto *Y = SE.getSCEV(getInstructionByName(F, "y")); + auto *Guarded = getInstructionByName(F, "y")->getParent(); + ASSERT_TRUE(Guarded); + EXPECT_TRUE( + SE.isBasicBlockEntryGuardedByCond(Guarded, ICmpInst::ICMP_ULT, Y, X)); + EXPECT_TRUE( + SE.isBasicBlockEntryGuardedByCond(Guarded, ICmpInst::ICMP_UGT, X, Y)); + }); +} + } // end namespace llvm From 47df8c57e4ed01fa0101aa0b320fc7cf5a90df28 Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Tue, 29 Sep 2020 17:14:42 -0700 Subject: [PATCH 345/544] [MLIR] Updates around MemRef Normalization The documentation for the NormalizeMemRefs pass and the associated MemRefsNormalizable traits was confusing and not on the website. This update clarifies the language around the difference between a MemRef Type, an operation that accesses the value of MemRef Type, and better documents the limitations of the current implementation. This patch also includes some basic debugging information for the pass so people might have a chance of figuring out why it doesn't work on their code. Differential Revision: https://reviews.llvm.org/D88532 --- mlir/docs/Traits.md | 16 ++-- mlir/include/mlir/IR/OpDefinition.h | 9 +- mlir/include/mlir/Transforms/Passes.td | 110 +++++++++++++++++++++++ mlir/lib/Transforms/NormalizeMemRefs.cpp | 37 ++------ 4 files changed, 130 insertions(+), 42 deletions(-) diff --git a/mlir/docs/Traits.md b/mlir/docs/Traits.md index 3fa56249ae429..488da39e65043 100644 --- a/mlir/docs/Traits.md +++ b/mlir/docs/Traits.md @@ -251,13 +251,15 @@ to have [passes](PassManagement.md) scheduled under them. * `OpTrait::MemRefsNormalizable` -- `MemRefsNormalizable` -This trait is used to flag operations that can accommodate `MemRefs` with -non-identity memory-layout specifications. This trait indicates that the -normalization of memory layout can be performed for such operations. -`MemRefs` normalization consists of replacing an original memory reference -with layout specifications to an equivalent memory reference where -the specified memory layout is applied by rewritting accesses and types -associated with that memory reference. +This trait is used to flag operations that consume or produce +values of `MemRef` type where those references can be 'normalized'. +In cases where an associated `MemRef` has a +non-identity memory-layout specification, such normalizable operations can be +modified so that the `MemRef` has an identity layout specification. +This can be implemented by associating the operation with its own +index expression that can express the equivalent of the memory-layout +specification of the MemRef type. See [the -normalize-memrefs pass]. +(https://mlir.llvm.org/docs/Passes/#-normalize-memrefs-normalize-memrefs) ### Single Block with Implicit Terminator diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h index 9f3df4343261e..6861523e0d045 100644 --- a/mlir/include/mlir/IR/OpDefinition.h +++ b/mlir/include/mlir/IR/OpDefinition.h @@ -1212,13 +1212,8 @@ struct NoRegionArguments : public TraitBase { } }; -/// This trait is used to flag operations that can accommodate MemRefs with -/// non-identity memory-layout specifications. This trait indicates that the -/// normalization of memory layout can be performed for such operations. -/// MemRefs normalization consists of replacing an original memory reference -/// with layout specifications to an equivalent memory reference where the -/// specified memory layout is applied by rewritting accesses and types -/// associated with that memory reference. +// This trait is used to flag operations that consume or produce +// values of `MemRef` type where those references can be 'normalized'. // TODO: Right now, the operands of an operation are either all normalizable, // or not. In the future, we may want to allow some of the operands to be // normalizable. diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td index 3292d5e7dec2d..367e19cfcd55a 100644 --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -313,6 +313,116 @@ def MemRefDataFlowOpt : FunctionPass<"memref-dataflow-opt"> { def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> { let summary = "Normalize memrefs"; + let description = [{ + This pass transforms memref types with a non-trivial + [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into + memref types with an identity layout map, e.g. (i, j) -> (i, j). This + pass is inter-procedural, in the sense that it can modify function + interfaces and call sites that pass memref types. In order to modify + memref types while preserving the original behavior, users of those + memref types are also modified to incorporate the resulting layout map. + For instance, an [AffineLoadOp] + (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop) + will be updated to compose the layout map with with the affine expression + contained in the op. Operations marked with the [MemRefsNormalizable] + (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are + expected to be normalizable. Supported operations include affine + operations, std.alloc, std.dealloc, and std.return. + + Given an appropriate layout map specified in the code, this transformation + can express tiled or linearized access to multi-dimensional data + structures, but will not modify memref types without an explicit layout + map. + + Currently this pass is limited to only modify + functions where all memref types can be normalized. If a function + contains any operations that are not MemRefNormalizable, then the function + and any functions that call or call it will not be modified. + + Input + + ```mlir + #tile = affine_map<(i) -> (i floordiv 4, i mod 4)> + func @matmul(%A: memref<16xf64, #tile>, + %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) { + affine.for %arg3 = 0 to 16 { + %a = affine.load %A[%arg3] : memref<16xf64, #tile> + %p = mulf %a, %a : f64 + affine.store %p, %A[%arg3] : memref<16xf64, #tile> + } + %c = alloc() : memref<16xf64, #tile> + %d = affine.load %c[0] : memref<16xf64, #tile> + return %A: memref<16xf64, #tile> + } + ``` + + Output + + ```mlir + func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>) + -> memref<4x4xf64> { + affine.for %arg3 = 0 to 16 { + %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64> + %4 = mulf %3, %3 : f64 + affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64> + } + %0 = alloc() : memref<4x4xf64> + %1 = affine.apply #map1() + %2 = affine.load %0[0, 0] : memref<4x4xf64> + return %arg0 : memref<4x4xf64> + } + ``` + + Input + + ``` + #linear8 = affine_map<(i, j) -> (i * 8 + j)> + func @linearize(%arg0: memref<8x8xi32, #linear8>, + %arg1: memref<8x8xi32, #linear8>, + %arg2: memref<8x8xi32, #linear8>) { + %c8 = constant 8 : index + %c0 = constant 0 : index + %c1 = constant 1 : index + affine.for %arg3 = %c0 to %c8 { + affine.for %arg4 = %c0 to %c8 { + affine.for %arg5 = %c0 to %c8 { + %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8> + %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8> + %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8> + %3 = muli %0, %1 : i32 + %4 = addi %2, %3 : i32 + affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8> + } + } + } + return + } + ``` + + Output + + ```mlir + func @linearize(%arg0: memref<64xi32>, + %arg1: memref<64xi32>, + %arg2: memref<64xi32>) { + %c8 = constant 8 : index + %c0 = constant 0 : index + affine.for %arg3 = %c0 to %c8 { + affine.for %arg4 = %c0 to %c8 { + affine.for %arg5 = %c0 to %c8 { + %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32> + %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32> + %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32> + %3 = muli %0, %1 : i32 + %4 = addi %2, %3 : i32 + affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32> + } + } + } + return + } + ``` + }]; let constructor = "mlir::createNormalizeMemRefsPass()"; } diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp index ac02f0e6ba975..44b3ccbd2c3fd 100644 --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -29,34 +29,6 @@ namespace { /// such functions as normalizable. Also, if a normalizable function is known /// to call a non-normalizable function, we treat that function as /// non-normalizable as well. We assume external functions to be normalizable. -/// -/// Input :- -/// #tile = affine_map<(i) -> (i floordiv 4, i mod 4)> -/// func @matmul(%A: memref<16xf64, #tile>, %B: index, %C: memref<16xf64>) -> -/// (memref<16xf64, #tile>) { -/// affine.for %arg3 = 0 to 16 { -/// %a = affine.load %A[%arg3] : memref<16xf64, #tile> -/// %p = mulf %a, %a : f64 -/// affine.store %p, %A[%arg3] : memref<16xf64, #tile> -/// } -/// %c = alloc() : memref<16xf64, #tile> -/// %d = affine.load %c[0] : memref<16xf64, #tile> -/// return %A: memref<16xf64, #tile> -/// } -/// -/// Output :- -/// func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>) -/// -> memref<4x4xf64> { -/// affine.for %arg3 = 0 to 16 { -/// %2 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4] : -/// memref<4x4xf64> %3 = mulf %2, %2 : f64 affine.store %3, %arg0[%arg3 -/// floordiv 4, %arg3 mod 4] : memref<4x4xf64> -/// } -/// %0 = alloc() : memref<16xf64, #map0> -/// %1 = affine.load %0[0] : memref<16xf64, #map0> -/// return %arg0 : memref<4x4xf64> -/// } -/// struct NormalizeMemRefs : public NormalizeMemRefsBase { void runOnOperation() override; void normalizeFuncOpMemRefs(FuncOp funcOp, ModuleOp moduleOp); @@ -73,6 +45,7 @@ std::unique_ptr> mlir::createNormalizeMemRefsPass() { } void NormalizeMemRefs::runOnOperation() { + LLVM_DEBUG(llvm::dbgs() << "Normalizing Memrefs...\n"); ModuleOp moduleOp = getOperation(); // We maintain all normalizable FuncOps in a DenseSet. It is initialized // with all the functions within a module and then functions which are not @@ -92,6 +65,9 @@ void NormalizeMemRefs::runOnOperation() { moduleOp.walk([&](FuncOp funcOp) { if (normalizableFuncs.contains(funcOp)) { if (!areMemRefsNormalizable(funcOp)) { + LLVM_DEBUG(llvm::dbgs() + << "@" << funcOp.getName() + << " contains ops that cannot normalize MemRefs\n"); // Since this function is not normalizable, we set all the caller // functions and the callees of this function as not normalizable. // TODO: Drop this conservative assumption in the future. @@ -101,6 +77,8 @@ void NormalizeMemRefs::runOnOperation() { } }); + LLVM_DEBUG(llvm::dbgs() << "Normalizing " << normalizableFuncs.size() + << " functions\n"); // Those functions which can be normalized are subjected to normalization. for (FuncOp &funcOp : normalizableFuncs) normalizeFuncOpMemRefs(funcOp, moduleOp); @@ -127,6 +105,9 @@ void NormalizeMemRefs::setCalleesAndCallersNonNormalizable( if (!normalizableFuncs.contains(funcOp)) return; + LLVM_DEBUG( + llvm::dbgs() << "@" << funcOp.getName() + << " calls or is called by non-normalizable function\n"); normalizableFuncs.erase(funcOp); // Caller of the function. Optional symbolUses = funcOp.getSymbolUses(moduleOp); From afb4e0f289ac6d020faafda078642a3716629abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 1 Oct 2020 10:21:24 +0300 Subject: [PATCH 346/544] [AArch64] Omit SEH directives for the epilogue if none are needed For these cases, we already omit the prologue directives, if (!AFI->hasStackFrame() && !windowsRequiresStackProbe && !NumBytes). When writing the epilogue (after the prolog has been written), if the function doesn't have the WinCFI flag set (i.e. if no prologue was generated), assume that no epilogue will be needed either, and don't emit any epilog start pseudo instruction. After completing the epilogue, make sure that it actually matched the prologue. Previously, when epilogue start/end was generated, but no prologue, the unwind info for such functions actually was huge; 12 bytes xdata (4 bytes header, 4 bytes for one non-folded epilogue header, 4 bytes for padded opcodes) and 8 bytes pdata. Because the epilog consisted of one opcode (end) but the prolog was empty (no .seh_endprologue), the epilogue couldn't be folded into the prologue, and thus couldn't be considered for packed form either. On a 6.5 MB DLL with 110 KB pdata and 166 KB xdata, this gets rid of 38 KB pdata and 62 KB xdata. Differential Revision: https://reviews.llvm.org/D88641 --- .../Target/AArch64/AArch64FrameLowering.cpp | 20 ++++++++++--------- .../CodeGen/AArch64/lrint-conv-fp16-win.ll | 6 ------ llvm/test/CodeGen/AArch64/lrint-conv-win.ll | 8 -------- .../CodeGen/AArch64/lround-conv-fp16-win.ll | 2 -- llvm/test/CodeGen/AArch64/lround-conv-win.ll | 8 -------- llvm/test/CodeGen/AArch64/powi-windows.ll | 4 ---- llvm/test/CodeGen/AArch64/win64-nocfi.ll | 11 ++++++++++ llvm/test/CodeGen/AArch64/win_cst_pool.ll | 4 ---- 8 files changed, 22 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 868bb247ed5ec..dde2b06a36f05 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1524,10 +1524,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool NeedsWinCFI = needsWinCFI(MF); bool HasWinCFI = false; bool IsFunclet = false; - auto WinCFI = make_scope_exit([&]() { - if (!MF.hasWinCFI()) - MF.setHasWinCFI(HasWinCFI); - }); + auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); @@ -1627,7 +1624,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, NeedsWinCFI, &HasWinCFI); } - if (NeedsWinCFI) { + if (MF.hasWinCFI()) { + // If the prologue didn't contain any SEH opcodes and didn't set the + // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the + // EpilogStart - to avoid generating CFI for functions that don't need it. + // (And as we didn't generate any prologue at all, it would be assymetrical + // to the epilogue.) By the end of the function, we assert that + // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption. HasWinCFI = true; BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) .setMIFlag(MachineInstr::FrameDestroy); @@ -1641,7 +1644,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - if (NeedsWinCFI && HasWinCFI) + if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); @@ -1720,8 +1723,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, {StackRestoreBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (Done) { - if (NeedsWinCFI) { - HasWinCFI = true; + if (HasWinCFI) { BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); @@ -1767,7 +1769,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, {(int64_t)AfterCSRPopSize, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } - if (NeedsWinCFI && HasWinCFI) + if (HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); } diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16-win.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16-win.ll index 4299ce89ad184..ec9a8b2be8745 100644 --- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16-win.ll +++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16-win.ll @@ -3,8 +3,6 @@ ; CHECK-LABEL: testmhhs: ; CHECK: frintx h0, h0 ; CHECK-NEXT: fcvtzs w0, h0 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i16 @testmhhs(half %x) { entry: @@ -16,8 +14,6 @@ entry: ; CHECK-LABEL: testmhws: ; CHECK: frintx h0, h0 ; CHECK-NEXT: fcvtzs w0, h0 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i32 @testmhws(half %x) { entry: @@ -29,8 +25,6 @@ entry: ; CHECK: frintx h0, h0 ; CHECK-NEXT: fcvtzs w8, h0 ; CHECK-NEXT: sxtw x0, w8 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i64 @testmhxs(half %x) { entry: diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-win.ll b/llvm/test/CodeGen/AArch64/lrint-conv-win.ll index 8195ffe8a9fd1..490f009c3fbab 100644 --- a/llvm/test/CodeGen/AArch64/lrint-conv-win.ll +++ b/llvm/test/CodeGen/AArch64/lrint-conv-win.ll @@ -4,8 +4,6 @@ ; CHECK: frintx [[SREG:s[0-9]+]], s0 ; CHECK-NEXT: fcvtzs [[WREG:w[0-9]+]], [[SREG]] ; CHECK-NEXT: sxtw x0, [[WREG]] -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i64 @testmsxs(float %x) { entry: @@ -17,8 +15,6 @@ entry: ; CHECK-LABEL: testmsws: ; CHECK: frintx [[SREG:s[0-9]+]], s0 ; CHECK-NEXT: fcvtzs [[WREG:w[0-9]+]], [[SREG]] -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i32 @testmsws(float %x) { entry: @@ -30,8 +26,6 @@ entry: ; CHECK: frintx [[DREG:d[0-9]+]], d0 ; CHECK-NEXT: fcvtzs [[WREG:w[0-9]+]], [[DREG]] ; CHECK-NEXT: sxtw x0, [[WREG]] -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i64 @testmsxd(double %x) { entry: @@ -43,8 +37,6 @@ entry: ; CHECK-LABEL: testmswd: ; CHECK: frintx [[DREG:d[0-9]+]], d0 ; CHECK-NEXT: fcvtzs [[WREG:w[0-9]+]], [[DREG]] -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i32 @testmswd(double %x) { entry: diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16-win.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16-win.ll index ea14659203edd..5eabc2a4f4630 100644 --- a/llvm/test/CodeGen/AArch64/lround-conv-fp16-win.ll +++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16-win.ll @@ -22,8 +22,6 @@ entry: ; CHECK-LABEL: testmhxs: ; CHECK: fcvtas w8, h0 ; CHECK-NEXT: sxtw x0, w8 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i64 @testmhxs(half %x) { entry: diff --git a/llvm/test/CodeGen/AArch64/lround-conv-win.ll b/llvm/test/CodeGen/AArch64/lround-conv-win.ll index b815f2a292498..8bc9213fdcedf 100644 --- a/llvm/test/CodeGen/AArch64/lround-conv-win.ll +++ b/llvm/test/CodeGen/AArch64/lround-conv-win.ll @@ -3,8 +3,6 @@ ; CHECK-LABEL: testmsxs: ; CHECK: fcvtas w8, s0 ; CHECK-NEXT: sxtw x0, w8 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i64 @testmsxs(float %x) { entry: @@ -15,8 +13,6 @@ entry: ; CHECK-LABEL: testmsws: ; CHECK: fcvtas w0, s0 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i32 @testmsws(float %x) { entry: @@ -27,8 +23,6 @@ entry: ; CHECK-LABEL: testmsxd: ; CHECK: fcvtas w8, d0 ; CHECK-NEXT: sxtw x0, w8 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i64 @testmsxd(double %x) { entry: @@ -39,8 +33,6 @@ entry: ; CHECK-LABEL: testmswd: ; CHECK: fcvtas w0, d0 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret define i32 @testmswd(double %x) { entry: diff --git a/llvm/test/CodeGen/AArch64/powi-windows.ll b/llvm/test/CodeGen/AArch64/powi-windows.ll index 809563f3e9e07..859d772b447ad 100644 --- a/llvm/test/CodeGen/AArch64/powi-windows.ll +++ b/llvm/test/CodeGen/AArch64/powi-windows.ll @@ -11,8 +11,6 @@ entry: ; CHECK-LABEL: d: ; CHECK: scvtf d1, w0 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: b pow define float @f(float %f, i32 %i) { @@ -23,8 +21,6 @@ entry: ; CHECK-LABEL: f: ; CHECK: scvtf s1, w0 -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: b powf define float @g(double %d, i32 %i) { diff --git a/llvm/test/CodeGen/AArch64/win64-nocfi.ll b/llvm/test/CodeGen/AArch64/win64-nocfi.ll index a1ca56173a2a2..aae157a81372a 100644 --- a/llvm/test/CodeGen/AArch64/win64-nocfi.ll +++ b/llvm/test/CodeGen/AArch64/win64-nocfi.ll @@ -11,3 +11,14 @@ entry: } declare void @llvm.trap() noreturn nounwind + +define dso_local i32 @getValue() nounwind sspstrong uwtable { +; CHECK-LABEL: getValue +; CHECK-NOT: .seh_proc +; CHECK-NOT: .seh_endprologue +; CHECK-NOT: .seh_startepilogue +; CHECK-NOT: .seh_endepilogue +; CHECK-NOT: .seh_endproc +entry: + ret i32 42 +} diff --git a/llvm/test/CodeGen/AArch64/win_cst_pool.ll b/llvm/test/CodeGen/AArch64/win_cst_pool.ll index 6065b5f344cea..5d9eed408d40f 100644 --- a/llvm/test/CodeGen/AArch64/win_cst_pool.ll +++ b/llvm/test/CodeGen/AArch64/win_cst_pool.ll @@ -12,8 +12,6 @@ define double @double() { ; CHECK: double: ; CHECK: adrp x8, __real@2000000000800001 ; CHECK-NEXT: ldr d0, [x8, __real@2000000000800001] -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret ; MINGW: .section .rdata,"dr" @@ -23,6 +21,4 @@ define double @double() { ; MINGW: double: ; MINGW: adrp x8, [[LABEL]] ; MINGW-NEXT: ldr d0, [x8, [[LABEL]]] -; MINGW-NEXT: .seh_startepilogue -; MINGW-NEXT: .seh_endepilogue ; MINGW-NEXT: ret From b8ce6a67568ba16683a2b1a5e8ebd28d5d537874 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 1 Oct 2020 12:48:07 +0100 Subject: [PATCH 347/544] [SVE][CodeGen] Add new EVT/MVT getFixedSizeInBits() functions When we know that a particular type is always going to be fixed width we have so far been writing code like this: getSizeInBits().getFixedSize() Since we are doing this in quite a few places now it seems to make sense to add a new helper function that allows us to replace these calls with a single getFixedSizeInBits() call. Differential Revision: https://reviews.llvm.org/D88649 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 2 +- llvm/include/llvm/CodeGen/ValueTypes.h | 6 ++++++ llvm/include/llvm/Support/MachineValueType.h | 6 ++++++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 8 ++++---- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 10 +++++----- llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 2 +- llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp | 2 +- 8 files changed, 25 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index d93e2f9970074..632d112dcd6c5 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -181,7 +181,7 @@ class SDValue { } uint64_t getScalarValueSizeInBits() const { - return getValueType().getScalarType().getSizeInBits().getFixedSize(); + return getValueType().getScalarType().getFixedSizeInBits(); } // Forwarding methods - These forward to the corresponding methods in SDNode. diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index b6f3fabd7f6a5..d409196af8d9d 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -348,6 +348,12 @@ namespace llvm { return getExtendedSizeInBits(); } + /// Return the size of the specified fixed width value type in bits. The + /// function will assert if the type is scalable. + uint64_t getFixedSizeInBits() const { + return getSizeInBits().getFixedSize(); + } + uint64_t getScalarSizeInBits() const { return getScalarType().getSizeInBits().getFixedSize(); } diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 713f847535e88..c9531f3499424 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -923,6 +923,12 @@ namespace llvm { } } + /// Return the size of the specified fixed width value type in bits. The + /// function will assert if the type is scalable. + uint64_t getFixedSizeInBits() const { + return getSizeInBits().getFixedSize(); + } + uint64_t getScalarSizeInBits() const { return getScalarType().getSizeInBits().getFixedSize(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0b3edc3416859..8e14a73e7ea1e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1517,7 +1517,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Store = DAG.getTruncStore( Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT, commonAlignment(SmallestAlign, - EltVT.getSizeInBits().getFixedSize() / 8)); + EltVT.getFixedSizeInBits() / 8)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); @@ -2310,7 +2310,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT, - commonAlignment(SmallestAlign, EltVT.getSizeInBits().getFixedSize() / 8)); + commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8)); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -4904,7 +4904,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - if (RetVT.getSizeInBits().getFixedSize() < MemVTWidth || MemVT == WidenVT) + if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT) return MemVT; } } @@ -5169,7 +5169,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, EVT ValVT = ValOp.getValueType(); TypeSize ValWidth = ValVT.getSizeInBits(); EVT ValEltVT = ValVT.getVectorElementType(); - unsigned ValEltWidth = ValEltVT.getSizeInBits().getFixedSize(); + unsigned ValEltWidth = ValEltVT.getFixedSizeInBits(); assert(StVT.getVectorElementType() == ValEltVT); assert(StVT.isScalableVector() == ValVT.isScalableVector() && "Mismatch between store and value types"); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5c9273150014f..cd49d5bfd98b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7204,7 +7204,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, "Unaligned store of unknown type."); // Get the half-size VT EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext()); - unsigned NumBits = NewStoredVT.getSizeInBits().getFixedSize(); + unsigned NumBits = NewStoredVT.getFixedSizeInBits(); unsigned IncrementSize = NumBits / 8; // Divide the stored value in two parts. @@ -7262,7 +7262,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); } else if (DataVT.isScalableVector()) { Increment = DAG.getVScale(DL, AddrVT, - APInt(AddrVT.getSizeInBits().getFixedSize(), + APInt(AddrVT.getFixedSizeInBits(), DataVT.getStoreSize().getKnownMinSize())); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); @@ -7281,7 +7281,7 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, unsigned NElts = VecVT.getVectorMinNumElements(); if (VecVT.isScalableVector()) { SDValue VS = DAG.getVScale(dl, IdxVT, - APInt(IdxVT.getSizeInBits().getFixedSize(), + APInt(IdxVT.getFixedSizeInBits(), NElts)); SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS, DAG.getConstant(1, dl, IdxVT)); @@ -7310,8 +7310,8 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, EVT EltVT = VecVT.getVectorElementType(); // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltVT.getSizeInBits().getFixedSize() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltVT.getSizeInBits().getFixedSize() && + unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 84b596b498234..ead52b8034592 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -615,7 +615,7 @@ void TargetLoweringBase::initActions() { std::end(TargetDAGCombineArray), 0); for (MVT VT : MVT::fp_valuetypes()) { - MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize()); + MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits()); if (IntVT.isValid()) { setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index fff08db319140..e87ef08d8ed52 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -228,7 +228,7 @@ HexagonTargetLowering::initializeHVXLowering() { for (MVT ElemTy : Subtarget.getHVXElementTypes()) { if (ElemTy == MVT::i1) continue; - int ElemWidth = ElemTy.getSizeInBits().getFixedSize(); + int ElemWidth = ElemTy.getFixedSizeInBits(); int MaxElems = (8*HwLen) / ElemWidth; for (int N = 2; N < MaxElems; N *= 2) { MVT VecTy = MVT::getVectorVT(ElemTy, N); diff --git a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp index fb00a12f4851f..48b950fa74e9f 100644 --- a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp +++ b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp @@ -160,7 +160,7 @@ TEST(ScalableVectorMVTsTest, SizeQueries) { // Check that we can obtain a known-exact size from a non-scalable type. EXPECT_EQ(v4i32.getSizeInBits(), 128U); - EXPECT_EQ(v2i64.getSizeInBits().getFixedSize(), 128U); + EXPECT_EQ(v2i64.getFixedSizeInBits(), 128U); // Check that we can query the known minimum size for both scalable and // fixed length types. From b0ce9f0f4cff7df243b72e308ec863f012724475 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 30 Sep 2020 15:10:03 +0100 Subject: [PATCH 348/544] [SVE][CodeGen] Fix implicit TypeSize->uint64_t casts in TypePromotion The TypePromotion pass only operates on scalar types so I've fixed up all places where we were relying upon the implicit cast from TypeSize->uint64_t. Differential Revision: https://reviews.llvm.org/D88575 --- llvm/lib/CodeGen/TypePromotion.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 807babdcaf250..a42095d8718a3 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -134,8 +134,9 @@ class IRPromoter { Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); - assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() - && "Original type not smaller than extended type"); + assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < + ExtTy->getPrimitiveSizeInBits().getFixedSize() && + "Original type not smaller than extended type"); } void Mutate(); @@ -809,7 +810,7 @@ bool TypePromotion::isLegalToPromote(Value *V) { bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { Type *OrigTy = V->getType(); - TypeSize = OrigTy->getPrimitiveSizeInBits(); + TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize(); SafeToPromote.clear(); SafeWrap.clear(); @@ -980,15 +981,14 @@ bool TypePromotion::runOnFunction(Function &F) { if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != TargetLowering::TypePromoteInteger) break; - EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); - if (RegisterBitWidth < PromotedVT.getSizeInBits()) { + if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " << "for promoted type\n"); break; } - MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits()); + MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits()); break; } } From 542523a61a21c13e7f244bcf821b0fdeb8c6bb24 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 2 Oct 2020 00:28:06 -0700 Subject: [PATCH 349/544] [WebAssembly] Emulate v128.const efficiently v128.const was recently implemented in V8, but until it rolls into Chrome stable, we can't enable it in the WebAssembly backend without breaking origin trial users. So far we have been lowering build_vectors that would otherwise have been lowered to v128.const to splats followed by sequences of replace_lane instructions to initialize each lane individually. That produces large and inefficient code, so this patch introduces new logic to lower integer vector constants to a single i64x2.splat where possible, with at most a single i64x2.replace_lane following it if necessary. Adapted from a patch authored by @omnisip. Differential Revision: https://reviews.llvm.org/D88591 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 69 +++++++++++++++++-- .../CodeGen/WebAssembly/simd-build-vector.ll | 69 +++++++++++++++++-- 2 files changed, 130 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 425f8b86c9fbc..8474e50ea42f7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -1565,6 +1566,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, }; } else if (NumConstantLanes >= NumSplatLanes && Subtarget->hasUnimplementedSIMD128()) { + // If we support v128.const, emit it directly SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1576,11 +1578,67 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } Result = DAG.getBuildVector(VecT, DL, ConstLanes); - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { return IsConstant(Lane); }; - } - if (!Result) { + } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) { + // Otherwise, if this is an integer vector, pack the lane values together so + // we can construct the 128-bit constant from a pair of i64s using a splat + // followed by at most one i64x2.replace_lane. Also keep track of the lanes + // that actually matter so we can avoid the replace_lane in more cases. + std::array I64s({0, 0}); + std::array ConstLaneMasks({0, 0}); + uint8_t *I64Bytes = reinterpret_cast(I64s.data()); + uint8_t *MaskBytes = reinterpret_cast(ConstLaneMasks.data()); + unsigned I = 0; + size_t ByteStep = VecT.getScalarSizeInBits() / 8; + for (const SDValue &Lane : Op->op_values()) { + if (IsConstant(Lane)) { + using llvm::support::little; + using llvm::support::endian::byte_swap; + // The endianness of the compiler matters here. We want to enforce + // little endianness so that the bytes of a smaller integer type will + // occur first in the uint64_t. + auto *Const = cast(Lane.getNode()); + uint64_t Val = byte_swap(Const->getLimitedValue(), little); + uint8_t *ValPtr = reinterpret_cast(&Val); + std::copy(ValPtr, ValPtr + ByteStep, I64Bytes + I * ByteStep); + uint64_t Mask = uint64_t(-1LL); + uint8_t *MaskPtr = reinterpret_cast(&Mask); + std::copy(MaskPtr, MaskPtr + ByteStep, MaskBytes + I * ByteStep); + } + ++I; + } + // Check whether all constant lanes in the second half of the vector are + // equivalent in the first half or vice versa to determine whether splatting + // either side will be sufficient to materialize the constant. As a special + // case, if the first and second halves have no constant lanes in common, we + // can just combine them. + bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1]; + bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0]; + bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0; + + uint64_t Splatted; + if (SecondHalfSufficient) { + Splatted = I64s[1]; + } else if (CombinedSufficient) { + Splatted = I64s[0] | I64s[1]; + } else { + Splatted = I64s[0]; + } + + Result = DAG.getSplatBuildVector(MVT::v2i64, DL, + DAG.getConstant(Splatted, DL, MVT::i64)); + if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) { + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result, + DAG.getConstant(I64s[1], DL, MVT::i64), + DAG.getConstant(1, DL, MVT::i32)); + } + Result = DAG.getBitcast(VecT, Result); + IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; + } else { // Use a splat, but possibly a load_splat LoadSDNode *SplattedLoad; if ((SplattedLoad = dyn_cast(SplatValue)) && @@ -1593,11 +1651,14 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } else { Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; } + assert(Result); + assert(IsLaneConstructed); + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index 43cfa97933f84..afd7375d146ae 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -8,12 +8,73 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" +; CHECK-LABEL: emulated_const_trivial_splat: +; CHECK-NEXT: .functype emulated_const_trivial_splat () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_trivial_splat() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_first_sufficient: +; CHECK-NEXT: .functype emulated_const_first_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_first_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_second_sufficient: +; CHECK-NEXT: .functype emulated_const_second_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_second_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_combined_sufficient: +; CHECK-NEXT: .functype emulated_const_combined_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_combined_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_either_sufficient: +; CHECK-NEXT: .functype emulated_const_either_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 1 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_either_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_neither_sufficient: +; CHECK-NEXT: .functype emulated_const_neither_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: i64.const $push2=, 17179869184 +; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2 +; SIMD-VM-NEXT: return $pop3 +define <4 x i32> @emulated_const_neither_sufficient() { + ret <4 x i32> +} + ; CHECK-LABEL: same_const_one_replaced_i16x8: ; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i16x8.splat +; SIMD-VM: i64x2.splat define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -27,7 +88,7 @@ define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i16x8.splat +; SIMD-VM: i64x2.splat define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -68,7 +129,7 @@ define <4 x float> @different_const_one_replaced_f32x4(float %x) { ; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 ; UNIMP-NEXT: return $pop[[L0]] -; SIMD-VM: i32x4.splat +; SIMD-VM: i64x2.splat define <4 x i32> @splat_common_const_i32x4() { ret <4 x i32> } @@ -206,7 +267,7 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla ; UNIMP: i8x16.replace_lane ; UNIMP: i8x16.replace_lane ; UNIMP: return -; SIMD-VM: i8x16.splat +; SIMD-VM: i64x2.splat define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { ; swizzle 0 %m0 = extractelement <16 x i8> %mask, i32 0 From bc18d8d9b705d31a94c51900c8b18e4feaf9c7fb Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Oct 2020 09:53:06 +0200 Subject: [PATCH 350/544] [clangd] Drop dependence on standard library in check.test --- clang-tools-extra/clangd/test/check.test | 27 ++++++++++++++---------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clangd/test/check.test b/clang-tools-extra/clangd/test/check.test index 832629ce29ef8..d83562c4dcf00 100644 --- a/clang-tools-extra/clangd/test/check.test +++ b/clang-tools-extra/clangd/test/check.test @@ -1,13 +1,18 @@ -# RUN: clangd -log=verbose -check 2>&1 | FileCheck -strict-whitespace %s +// RUN: cp %s %t.cpp +// RUN: clangd -log=verbose -check=%t.cpp 2>&1 | FileCheck -strict-whitespace %s -CHECK: Testing on source file {{.*}}test.cc -CHECK: internal (cc1) args are: -cc1 -CHECK: Building preamble... -CHECK: Built preamble -CHECK: Building AST... -CHECK: Testing features at each token -CHECK-DAG: hover: false -CHECK-DAG: hover: true -CHECK-DAG: tweak: AddUsing -CHECK: All checks completed, 0 errors +// CHECK: Testing on source file +// CHECK: internal (cc1) args are: -cc1 +// CHECK: Building preamble... +// CHECK: Built preamble +// CHECK: Building AST... +// CHECK: Testing features at each token +// CHECK-DAG: tweak: ExpandAuto +// CHECK-DAG: hover: true +// CHECK-DAG: tweak: AddUsing +// CHECK: All checks completed, 0 errors +namespace ns { +struct Foo {}; +} // namespace ns +auto f = ns::Foo(); From bfd7ee92ccec2904d98b20b475f48addadc4ec5f Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Fri, 2 Oct 2020 10:22:53 +0200 Subject: [PATCH 351/544] Handle unused variable without asserts --- llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 9317c185623b4..510d7fd8b8d8b 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -211,6 +211,7 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL) { MachineFunction *MF = MBB.getParent(); + (void)MF; assert(MF->getFunction().getCallingConv() == CallingConv::AMDGPU_PS); // It is possible for an SI_KILL_*_TERMINATOR to sit at the bottom of a From 5829dc925002aaf5e80189924e59d238d3d2a4d1 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 1 Oct 2020 16:16:50 +0300 Subject: [PATCH 352/544] [yaml2obj][elf2yaml] - Add a support for the `EntSize` field for `SHT_HASH` sections. Specification for SHT_HASH table says (https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html#hash) that it contains Elf32_Word entries for both 32/64 bit objects. Currently both GNU linkers and LLD sets the `sh_entsize` field to `4`. At the same time, `yaml2obj` ignores the `EntSize` field for SHT_HASH sections. This patch fixes this and also adds a support for obj2yaml: it will not dump this field when the `sh_entsize` contains the default value (`4`). Differential revision: https://reviews.llvm.org/D88652 --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 5 +++ .../test/tools/obj2yaml/ELF/hash-section.yaml | 37 +++++++++++++++++++ .../test/tools/yaml2obj/ELF/hash-section.yaml | 15 ++++++-- .../yaml2obj/ELF/section-headers-exclude.yaml | 2 +- llvm/tools/obj2yaml/elf2yaml.cpp | 2 + 5 files changed, 57 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 04542ccaecfff..10f31555005fb 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1396,6 +1396,11 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, SN2I.lookup(".dynsym", Link)) SHeader.sh_link = Link; + if (Section.EntSize) + SHeader.sh_entsize = *Section.EntSize; + else + SHeader.sh_entsize = sizeof(typename ELFT::Word); + if (Section.Content || Section.Size) { SHeader.sh_size = writeContent(CBA, Section.Content, Section.Size); return; diff --git a/llvm/test/tools/obj2yaml/ELF/hash-section.yaml b/llvm/test/tools/obj2yaml/ELF/hash-section.yaml index 1e867cbf4c98f..7e0a0288a5f12 100644 --- a/llvm/test/tools/obj2yaml/ELF/hash-section.yaml +++ b/llvm/test/tools/obj2yaml/ELF/hash-section.yaml @@ -74,3 +74,40 @@ Sections: - Name: .oversized Type: SHT_HASH Content: '0100000002000000030000000400000000' + +## Check how we dump the "EntSize" field. When the sh_entsize is 4, +## we don't print it, because it is the default value for the SHT_HASH section. + +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: obj2yaml %t3 | FileCheck %s --check-prefix=ENT-SIZE + +# ENT-SIZE: - Name: .hash.entsize.0 +# ENT-SIZE-NEXT: Type: SHT_HASH +# ENT-SIZE-NEXT: EntSize: 0x0000000000000000 +# ENT-SIZE-NEXT: Content: '' +# ENT-SIZE-NEXT: - Name: .hash.entsize.4.default +# ENT-SIZE-NEXT: Type: SHT_HASH +# ENT-SIZE-NEXT: Content: '' +# ENT-SIZE-NEXT: - Name: .hash.entsize.255 +# ENT-SIZE-NEXT: Type: SHT_HASH +# ENT-SIZE-NEXT: EntSize: 0x00000000000000FF +# ENT-SIZE-NEXT: Content: '' + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .hash.entsize.0 + Type: SHT_HASH + EntSize: 0 + Size: 0 + - Name: .hash.entsize.4.default + Type: SHT_HASH + EntSize: 4 + Size: 0 + - Name: .hash.entsize.255 + Type: SHT_HASH + EntSize: 255 + Size: 0 diff --git a/llvm/test/tools/yaml2obj/ELF/hash-section.yaml b/llvm/test/tools/yaml2obj/ELF/hash-section.yaml index 3c2a2af2df490..35e086a49bd34 100644 --- a/llvm/test/tools/yaml2obj/ELF/hash-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/hash-section.yaml @@ -1,9 +1,11 @@ ## Check how yaml2obj produces SHT_HASH sections. ## Check we can describe a SHT_HASH section using the "Content" tag. +## Check default values of section fields. # RUN: yaml2obj --docnum=1 %s -o %t1 -# RUN: llvm-readobj --sections --section-data %t1 | FileCheck %s --check-prefix=CONTENT +# RUN: llvm-readobj --sections --section-data %t1 | \ +# RUN: FileCheck %s -DENTSIZE=4 --check-prefix=CONTENT # CONTENT: Name: .hash # CONTENT-NEXT: Type: SHT_HASH @@ -15,7 +17,7 @@ # CONTENT-NEXT: Link: 1 # CONTENT-NEXT: Info: 0 # CONTENT-NEXT: AddressAlignment: 0 -# CONTENT-NEXT: EntrySize: 0 +# CONTENT-NEXT: EntrySize: [[ENTSIZE]]{{$}} # CONTENT-NEXT: SectionData ( # CONTENT-NEXT: 0000: 01000000 02000000 03000000 04000000 # CONTENT-NEXT: 0010: 05000000 @@ -33,6 +35,13 @@ Sections: - Name: .hash Type: SHT_HASH Content: '0100000002000000030000000400000005000000' + EntSize: [[ENTSIZE=]] + +## Check we can set an arbitrary entry size for the SHT_HASH section. + +# RUN: yaml2obj --docnum=1 -DENTSIZE=0xFF %s -o %t1.entsize +# RUN: llvm-readobj --sections --section-data %t1.entsize | \ +# RUN: FileCheck %s -DENTSIZE=255 --check-prefix=CONTENT ## Check we can describe a SHT_HASH section using "Bucket" and "Chain" tags. @@ -280,7 +289,7 @@ Sections: # OVERRIDE-NEXT: Link: 0 # OVERRIDE-NEXT: Info: 0 # OVERRIDE-NEXT: AddressAlignment: 0 -# OVERRIDE-NEXT: EntrySize: 0 +# OVERRIDE-NEXT: EntrySize: 4 # OVERRIDE-NEXT: SectionData ( # OVERRIDE-NEXT: 0000: AA000000 BB000000 01000000 02000000 # OVERRIDE-NEXT: 0010: 03000000 04000000 05000000 diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml index b7c37ebcf6a2e..45b339853cf17 100644 --- a/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml +++ b/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml @@ -406,7 +406,7 @@ SectionHeaderTable: # RUN: llvm-readelf %t8 --section-headers | FileCheck %s --check-prefix=LINK-HASH # LINK-HASH: [Nr] Name Type Address Off Size ES Flg Lk Inf Al -# LINK-HASH: [ 1] .hash HASH 0000000000000000 000040 000000 00 0 0 0 +# LINK-HASH: [ 1] .hash HASH 0000000000000000 000040 000000 04 0 0 0 # LINK-HASH-NEXT: [ 2] .gnu_hash GNU_HASH 0000000000000000 000040 000000 00 0 0 0 --- !ELF diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index a8eae03c0b78e..f5ad6a0a44997 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -659,6 +659,8 @@ static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType, return sizeof(typename ELFT::Relr); case ELF::SHT_DYNAMIC: return sizeof(typename ELFT::Dyn); + case ELF::SHT_HASH: + return sizeof(typename ELFT::Word); default: if (SecName == ".debug_str") return 1; From 9573c9f2a363da71b2c07a3add4e52721e6028a0 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Wed, 30 Sep 2020 11:35:00 +0200 Subject: [PATCH 353/544] Fix limit behavior of dynamic alloca When the allocation size is 0, we shouldn't probe. Within [1, PAGE_SIZE], we should probe once etc. This fixes https://bugs.llvm.org/show_bug.cgi?id=47657 Differential Revision: https://reviews.llvm.org/D88548 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8306e3a23f479..935fab44e7c1a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32394,7 +32394,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, BuildMI(testMBB, DL, TII->get(X86::JCC_1)) .addMBB(tailMBB) - .addImm(X86::COND_L); + .addImm(X86::COND_LE); testMBB->addSuccessor(blockMBB); testMBB->addSuccessor(tailMBB); diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll index bc46785640831..82fd67842c8a4 100644 --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"} ; CHECK-X86-64-NEXT: andq $-16, %rcx ; CHECK-X86-64-NEXT: subq %rcx, %rax ; CHECK-X86-64-NEXT: cmpq %rsp, %rax -; CHECK-X86-64-NEXT: jl .LBB0_3 +; CHECK-X86-64-NEXT: jle .LBB0_3 ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; CHECK-X86-64-NEXT: movq $0, (%rsp) ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: cmpq %rsp, %rax -; CHECK-X86-64-NEXT: jge .LBB0_2 +; CHECK-X86-64-NEXT: jg .LBB0_2 ; CHECK-X86-64-NEXT: .LBB0_3: ; CHECK-X86-64-NEXT: movq %rax, %rsp ; CHECK-X86-64-NEXT: movl $1, 4792(%rax) @@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"} ; CHECK-X86-32-NEXT: andl $-16, %ecx ; CHECK-X86-32-NEXT: subl %ecx, %eax ; CHECK-X86-32-NEXT: cmpl %esp, %eax -; CHECK-X86-32-NEXT: jl .LBB0_3 +; CHECK-X86-32-NEXT: jle .LBB0_3 ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; CHECK-X86-32-NEXT: movl $0, (%esp) ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: cmpl %esp, %eax -; CHECK-X86-32-NEXT: jge .LBB0_2 +; CHECK-X86-32-NEXT: jg .LBB0_2 ; CHECK-X86-32-NEXT: .LBB0_3: ; CHECK-X86-32-NEXT: movl %eax, %esp ; CHECK-X86-32-NEXT: movl $1, 4792(%eax) From 17747d2ec8ec4471748197db54c8703f0c07c91c Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 28 Sep 2020 18:12:37 +0200 Subject: [PATCH 354/544] [clangd] Remove Tweak::Intent, use CodeAction kind directly. NFC Intent was a nice idea but it ends up being a bit awkward/heavyweight without adding much. In particular, it makes it hard to implement `CodeActionParams.only` properly (there's an inheritance hierarchy for kinds). Differential Revision: https://reviews.llvm.org/D88427 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 9 +-------- clang-tools-extra/clangd/ClangdServer.cpp | 2 +- clang-tools-extra/clangd/ClangdServer.h | 2 +- clang-tools-extra/clangd/refactor/Tweak.h | 9 +-------- clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp | 4 +++- .../clangd/refactor/tweaks/AnnotateHighlightings.cpp | 4 +++- .../clangd/refactor/tweaks/DefineInline.cpp | 4 +++- .../clangd/refactor/tweaks/DefineOutline.cpp | 4 +++- clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp | 8 ++++---- .../clangd/refactor/tweaks/ExpandAutoType.cpp | 4 +++- clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp | 4 +++- .../clangd/refactor/tweaks/ExtractFunction.cpp | 4 +++- .../clangd/refactor/tweaks/ExtractVariable.cpp | 4 +++- .../clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp | 4 +++- .../clangd/refactor/tweaks/PopulateSwitch.cpp | 4 +++- .../clangd/refactor/tweaks/RawStringLiteral.cpp | 4 +++- .../clangd/refactor/tweaks/RemoveUsingNamespace.cpp | 4 +++- .../clangd/refactor/tweaks/SwapIfBranches.cpp | 4 +++- 18 files changed, 47 insertions(+), 35 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index dfd26ad40b89c..c2915aeada4f8 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -68,14 +68,7 @@ CodeAction toCodeAction(const ClangdServer::TweakRef &T, const URIForFile &File, Range Selection) { CodeAction CA; CA.title = T.Title; - switch (T.Intent) { - case Tweak::Refactor: - CA.kind = std::string(CodeAction::REFACTOR_KIND); - break; - case Tweak::Info: - CA.kind = std::string(CodeAction::INFO_KIND); - break; - } + CA.kind = T.Kind.str(); // This tweak may have an expensive second stage, we only run it if the user // actually chooses it in the UI. We reply with a command that would run the // corresponding tweak. diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 27d1a2dc7cdce..8c73b6a7d0632 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -521,7 +521,7 @@ void ClangdServer::enumerateTweaks(PathRef File, Range Sel, }; for (const auto &Sel : *Selections) { for (auto &T : prepareTweaks(*Sel, Filter)) { - Res.push_back({T->id(), T->title(), T->intent()}); + Res.push_back({T->id(), T->title(), T->kind()}); PreparedTweaks.insert(T->id()); TweakAvailable.record(1, T->id()); } diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index ae10dba32b58c..d801d3cd4353c 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -288,7 +288,7 @@ class ClangdServer { struct TweakRef { std::string ID; /// ID to pass for applyTweak. std::string Title; /// A single-line message to show in the UI. - Tweak::Intent Intent; + llvm::StringLiteral Kind; }; /// Enumerate the code tweaks available to the user at a specified point. void enumerateTweaks(PathRef File, Range Sel, diff --git a/clang-tools-extra/clangd/refactor/Tweak.h b/clang-tools-extra/clangd/refactor/Tweak.h index 10e3e8d3e5653..f991b78d89603 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.h +++ b/clang-tools-extra/clangd/refactor/Tweak.h @@ -67,13 +67,6 @@ class Tweak { // FIXME: provide a way to get sources and ASTs for other files. }; - /// Output of a tweak. - enum Intent { - /// Apply changes that preserve the behavior of the code. - Refactor, - /// Provide information to the user. - Info, - }; struct Effect { /// A message to be displayed to the user. llvm::Optional ShowMessage; @@ -120,7 +113,7 @@ class Tweak { virtual std::string title() const = 0; /// Describes what kind of action this is. /// EXPECTS: prepare() was called and returned true. - virtual Intent intent() const = 0; + virtual llvm::StringLiteral kind() const = 0; /// Is this a 'hidden' tweak, which are off by default. virtual bool hidden() const { return false; } }; diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp index 8c69b64c5affe..fe01894b63864 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp @@ -38,7 +38,9 @@ class AddUsing : public Tweak { bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; std::string title() const override; - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } private: // The qualifier to remove. Set by prepare(). diff --git a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp index 8e3eba35b004d..b243f24eb3696 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp @@ -28,7 +28,9 @@ class AnnotateHighlightings : public Tweak { Expected apply(const Selection &Inputs) override; std::string title() const override { return "Annotate highlighting tokens"; } - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } bool hidden() const override { return true; } }; REGISTER_TWEAK(AnnotateHighlightings) diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp index cdd5f9c6595b0..02be220e0b6c5 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp @@ -394,7 +394,9 @@ class DefineInline : public Tweak { public: const char *id() const override final; - Intent intent() const override { return Intent::Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } std::string title() const override { return "Move function body to declaration"; } diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp index ed4d0cc462692..0462090ee25ae 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -356,7 +356,9 @@ class DefineOutline : public Tweak { const char *id() const override; bool hidden() const override { return false; } - Intent intent() const override { return Intent::Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } std::string title() const override { return "Move function body to out-of-line."; } diff --git a/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp b/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp index b2b883d645679..72e7cd5a25279 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp @@ -45,7 +45,7 @@ class DumpAST : public Tweak { return std::string( llvm::formatv("Dump {0} AST", Node->getNodeKind().asStringRef())); } - Intent intent() const override { return Info; } + llvm::StringLiteral kind() const override { return CodeAction::INFO_KIND; } bool hidden() const override { return true; } private: @@ -91,7 +91,7 @@ class ShowSelectionTree : public Tweak { return Effect::showMessage(llvm::to_string(Inputs.ASTSelection)); } std::string title() const override { return "Show selection tree"; } - Intent intent() const override { return Info; } + llvm::StringLiteral kind() const override { return CodeAction::INFO_KIND; } bool hidden() const override { return true; } }; REGISTER_TWEAK(ShowSelectionTree) @@ -117,7 +117,7 @@ class DumpSymbol : public Tweak { return Effect::showMessage(Out.str()); } std::string title() const override { return "Dump symbol under the cursor"; } - Intent intent() const override { return Info; } + llvm::StringLiteral kind() const override { return CodeAction::INFO_KIND; } bool hidden() const override { return true; } }; REGISTER_TWEAK(DumpSymbol) @@ -153,7 +153,7 @@ class DumpRecordLayout : public Tweak { "Show {0} layout", TypeWithKeyword::getTagTypeKindName(Record->getTagKind()))); } - Intent intent() const override { return Info; } + llvm::StringLiteral kind() const override { return CodeAction::INFO_KIND; } // FIXME: this is interesting to most users. However: // - triggering is too broad (e.g. triggers on comments within a class) // - showMessage has inconsistent UX (e.g. newlines are stripped in VSCode) diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp index f9db50d934b09..4dfaf729c892b 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp @@ -37,7 +37,9 @@ namespace { class ExpandAutoType : public Tweak { public: const char *id() const final; - Intent intent() const override { return Intent::Refactor;} + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; std::string title() const override; diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp index 59a53f97c49c6..a7e2dddf4cbaf 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp @@ -31,7 +31,9 @@ namespace { class ExpandMacro : public Tweak { public: const char *id() const override final; - Intent intent() const override { return Intent::Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp index 6ee5aee37f51c..1ba8c3c1d9ff1 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp @@ -646,7 +646,9 @@ class ExtractFunction : public Tweak { bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; std::string title() const override { return "Extract to function"; } - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } private: ExtractionZone ExtZone; diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp index 104f8ba63dd04..8b668be5f2f92 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp @@ -438,7 +438,9 @@ class ExtractVariable : public Tweak { std::string title() const override { return "Extract subexpression to variable"; } - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } private: // the expression to extract diff --git a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp index 894f018aa7968..0c50db79d3670 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp @@ -35,7 +35,9 @@ namespace { class ObjCLocalizeStringLiteral : public Tweak { public: const char *id() const override final; - Intent intent() const override { return Intent::Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; diff --git a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp index 753e8b4df8265..12a6e49a16843 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp @@ -52,7 +52,9 @@ class PopulateSwitch : public Tweak { bool prepare(const Selection &Sel) override; Expected apply(const Selection &Sel) override; std::string title() const override { return "Populate switch"; } - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } private: const DeclContext *DeclCtx = nullptr; diff --git a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp index bec45be6c3254..b0ab3067449b0 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp @@ -41,7 +41,9 @@ class RawStringLiteral : public Tweak { bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; std::string title() const override { return "Convert to raw string"; } - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } private: const clang::StringLiteral *Str = nullptr; diff --git a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp index 9d1a9f12567c4..8bd9703397b62 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp @@ -39,7 +39,9 @@ class RemoveUsingNamespace : public Tweak { bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; std::string title() const override; - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } private: const UsingDirectiveDecl *TargetDirective = nullptr; diff --git a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp index d5299f014cc74..976f68215581f 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp @@ -39,7 +39,9 @@ class SwapIfBranches : public Tweak { bool prepare(const Selection &Inputs) override; Expected apply(const Selection &Inputs) override; std::string title() const override { return "Swap if branches"; } - Intent intent() const override { return Refactor; } + llvm::StringLiteral kind() const override { + return CodeAction::REFACTOR_KIND; + } bool hidden() const override { return true; } private: From 067add7b5fd22c879bd2bbf5d55f4fb9b63047bf Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Tue, 28 Jul 2020 14:45:28 +0800 Subject: [PATCH 355/544] [RISCV] Support vmsge.vx and vmsgeu.vx pseudo instructions in RVV. Implement vmsge{u}.vx pseudo instruction. According to RISC-V V specification, there are different scenarios for this pseudo instruction. I list them below. unmasked va >= x pseudoinstruction: vmsge{u}.vx vd, va, x expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd masked va >= x, vd != v0 pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 masked va >= x, vd == v0 pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt Use pseudo instruction to model vmsge{u}.vx. The pseudo instruction will convert to different expansion according to the condition. Differential Revision: https://reviews.llvm.org/D84732 --- .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 100 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 40 +++++++ llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 11 +- llvm/test/MC/RISCV/rvv/compare.s | 58 +++++++++- llvm/test/MC/RISCV/rvv/invalid.s | 8 ++ 5 files changed, 215 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 5898149c9fe15..8e0698966d05f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -61,6 +61,10 @@ class RISCVMCCodeEmitter : public MCCodeEmitter { SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + void expandVMSGE(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// TableGen'erated function for getting the binary encoding for an /// instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -188,6 +192,92 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS, support::endian::write(OS, Binary, support::little); } +void RISCVMCCodeEmitter::expandVMSGE(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + MCInst TmpInst; + uint32_t Binary; + unsigned Opcode; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpacted opcode. It should be vmsgeu.vx or vmsge.vx."); + case RISCV::PseudoVMSGEU_VX: + case RISCV::PseudoVMSGEU_VX_M: + case RISCV::PseudoVMSGEU_VX_M_T: + Opcode = RISCV::VMSLTU_VX; + break; + case RISCV::PseudoVMSGE_VX: + case RISCV::PseudoVMSGE_VX_M: + case RISCV::PseudoVMSGE_VX_M_T: + Opcode = RISCV::VMSLT_VX; + break; + } + if (MI.getNumOperands() == 3) { + // unmasked va >= x + // + // pseudoinstruction: vmsge{u}.vx vd, va, x + // expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd + TmpInst = MCInstBuilder(Opcode) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addReg(RISCV::NoRegister); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); + + TmpInst = MCInstBuilder(RISCV::VMNAND_MM) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(0)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); + } else if (MI.getNumOperands() == 4) { + // masked va >= x, vd != v0 + // + // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t + // expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 + assert(MI.getOperand(0).getReg() != RISCV::V0 && + "The destination register should not be V0."); + TmpInst = MCInstBuilder(Opcode) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(3)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); + + TmpInst = MCInstBuilder(RISCV::VMXOR_MM) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(0)) + .addReg(RISCV::V0); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); + } else if (MI.getNumOperands() == 5) { + // masked va >= x, vd == v0 + // + // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt + // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt + assert(MI.getOperand(0).getReg() == RISCV::V0 && + "The destination register should be V0."); + assert(MI.getOperand(1).getReg() != RISCV::V0 && + "The temporary vector register should not be V0."); + TmpInst = MCInstBuilder(Opcode) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(3)) + .addOperand(MI.getOperand(4)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); + + TmpInst = MCInstBuilder(RISCV::VMANDNOT_MM) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); + } +} + void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { @@ -216,6 +306,16 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, return; } + if (MI.getOpcode() == RISCV::PseudoVMSGEU_VX || + MI.getOpcode() == RISCV::PseudoVMSGE_VX || + MI.getOpcode() == RISCV::PseudoVMSGEU_VX_M || + MI.getOpcode() == RISCV::PseudoVMSGE_VX_M || + MI.getOpcode() == RISCV::PseudoVMSGEU_VX_M_T || + MI.getOpcode() == RISCV::PseudoVMSGE_VX_M_T) { + expandVMSGE(MI, OS, Fixups, STI); + return; + } + switch (Size) { default: llvm_unreachable("Unhandled encodeInstruction length!"); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 3ac474cb65499..f0c9fcae97112 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -629,6 +629,46 @@ def : InstAlias<"vmsge.vi $vd, $va, $imm$vm", (VMSGT_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm, VMaskOp:$vm), 0>; +let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def PseudoVMSGEU_VX : Pseudo<(outs VRegOp:$vd), + (ins VRegOp:$vs2, GPR:$rs1), + [], "vmsgeu.vx", "$vd, $vs2, $rs1">; +def PseudoVMSGE_VX : Pseudo<(outs VRegOp:$vd), + (ins VRegOp:$vs2, GPR:$rs1), + [], "vmsge.vx", "$vd, $vs2, $rs1">; +def PseudoVMSGEU_VX_M : Pseudo<(outs VRNoV0:$vd), + (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsgeu.vx", "$vd, $vs2, $rs1$vm">; +def PseudoVMSGE_VX_M : Pseudo<(outs VRNoV0:$vd), + (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsge.vx", "$vd, $vs2, $rs1$vm">; +def PseudoVMSGEU_VX_M_T : Pseudo<(outs VMV0:$vd, VRegOp:$scratch), + (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsgeu.vx", "$vd, $vs2, $rs1$vm, $scratch">; +def PseudoVMSGE_VX_M_T : Pseudo<(outs VMV0:$vd, VRegOp:$scratch), + (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsge.vx", "$vd, $vs2, $rs1$vm, $scratch">; +} + +// This apparently unnecessary alias prevents matching `vmsge{u}.vx vd, vs2, vs1` as if +// it were an unmasked (i.e. $vm = RISCV::NoRegister) PseudoVMSGE{U}_VX_M. +def : InstAlias<"vmsgeu.vx $vd, $va, $rs1", + (PseudoVMSGEU_VX VRegOp:$vd, VRegOp:$va, GPR:$rs1), 0>; +def : InstAlias<"vmsge.vx $vd, $va, $rs1", + (PseudoVMSGE_VX VRegOp:$vd, VRegOp:$va, GPR:$rs1), 0>; +def : InstAlias<"vmsgeu.vx v0, $va, $rs1, $vm, $vt", + (PseudoVMSGEU_VX_M_T V0, VRegOp:$vt, VRegOp:$va, GPR:$rs1, + VMaskOp:$vm), 0>; +def : InstAlias<"vmsge.vx v0, $va, $rs1, $vm, $vt", + (PseudoVMSGE_VX_M_T V0, VRegOp:$vt, VRegOp:$va, GPR:$rs1, + VMaskOp:$vm), 0>; +def : InstAlias<"vmsgeu.vx $vd, $va, $rs1, $vm", + (PseudoVMSGEU_VX_M VRNoV0:$vd, VRegOp:$va, GPR:$rs1, + VMaskOp:$vm), 0>; +def : InstAlias<"vmsge.vx $vd, $va, $rs1, $vm", + (PseudoVMSGE_VX_M VRNoV0:$vd, VRegOp:$va, GPR:$rs1, + VMaskOp:$vm), 0>; + // Vector Integer Min/Max Instructions defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>; defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 7544b4b3b8455..2b44847104a02 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -296,7 +296,7 @@ class RegisterTypes reg_types> { // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def VR : RegisterClass<"RISCV", [nxv8i8, nxv4i16, nxv2i32, nxv1i64], - 64, (add + 64, (add (sequence "V%u", 25, 31), (sequence "V%u", 8, 24), (sequence "V%u", 0, 7) @@ -304,6 +304,15 @@ def VR : RegisterClass<"RISCV", [nxv8i8, nxv4i16, nxv2i32, nxv1i64], let Size = 64; } +def VRNoV0 : RegisterClass<"RISCV", [nxv8i8, nxv4i16, nxv2i32, nxv1i64], + 64, (add + (sequence "V%u", 25, 31), + (sequence "V%u", 8, 24), + (sequence "V%u", 1, 7) + )> { + let Size = 64; +} + def VRM2 : RegisterClass<"RISCV", [nxv16i8, nxv8i16, nxv4i32, nxv2i64], 64, (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2, V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2)> { diff --git a/llvm/test/MC/RISCV/rvv/compare.s b/llvm/test/MC/RISCV/rvv/compare.s index f93aeac1796a7..6b5eb213d0ec2 100644 --- a/llvm/test/MC/RISCV/rvv/compare.s +++ b/llvm/test/MC/RISCV/rvv/compare.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ -# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ @@ -349,3 +349,59 @@ vmsge.vi v8, v4, 16 # CHECK-ENCODING: [0x57,0xb4,0x47,0x7e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 b4 47 7e + +vmsgeu.vx v8, v4, a0 +# CHECK-INST: vmsltu.vx v8, v4, a0 +# CHECK-INST: vmnot.m v8, v8 +# CHECK-ENCODING: [0x57,0x44,0x45,0x6a,0x57,0x24,0x84,0x76] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 44 45 6a +# CHECK-UNKNOWN: 57 24 84 76 + +vmsge.vx v0, v4, a0 +# CHECK-INST: vmslt.vx v0, v4, a0 +# CHECK-INST: vmnot.m v0, v0 +# CHECK-ENCODING: [0x57,0x40,0x45,0x6e,0x57,0x20,0x00,0x76] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 40 45 6e +# CHECK-UNKNOWN: 57 20 00 76 + +vmsge.vx v8, v4, a0 +# CHECK-INST: vmslt.vx v8, v4, a0 +# CHECK-INST: vmnot.m v8, v8 +# CHECK-ENCODING: [0x57,0x44,0x45,0x6e,0x57,0x24,0x84,0x76] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 44 45 6e +# CHECK-UNKNOWN: 57 24 84 76 + +vmsgeu.vx v8, v4, a0, v0.t +# CHECK-INST: vmsltu.vx v8, v4, a0, v0.t +# CHECK-INST: vmxor.mm v8, v8, v0 +# CHECK-ENCODING: [0x57,0x44,0x45,0x68,0x57,0x24,0x80,0x6e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 44 45 68 +# CHECK-UNKNOWN: 57 24 80 6e + +vmsge.vx v8, v4, a0, v0.t +# CHECK-INST: vmslt.vx v8, v4, a0, v0.t +# CHECK-INST: vmxor.mm v8, v8, v0 +# CHECK-ENCODING: [0x57,0x44,0x45,0x6c,0x57,0x24,0x80,0x6e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 44 45 6c +# CHECK-UNKNOWN: 57 24 80 6e + +vmsgeu.vx v0, v4, a0, v0.t, v2 +# CHECK-INST: vmsltu.vx v2, v4, a0, v0.t +# CHECK-INST: vmandnot.mm v0, v0, v2 +# CHECK-ENCODING: [0x57,0x41,0x45,0x68,0x57,0x20,0x01,0x62] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 41 45 68 +# CHECK-UNKNOWN: 57 20 01 62 + +vmsge.vx v0, v4, a0, v0.t, v2 +# CHECK-INST: vmslt.vx v2, v4, a0, v0.t +# CHECK-INST: vmandnot.mm v0, v0, v2 +# CHECK-ENCODING: [0x57,0x41,0x45,0x6c,0x57,0x20,0x01,0x62] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 41 45 6c +# CHECK-UNKNOWN: 57 20 01 62 diff --git a/llvm/test/MC/RISCV/rvv/invalid.s b/llvm/test/MC/RISCV/rvv/invalid.s index 615dc08ad67ce..79b4ea62f6653 100644 --- a/llvm/test/MC/RISCV/rvv/invalid.s +++ b/llvm/test/MC/RISCV/rvv/invalid.s @@ -590,3 +590,11 @@ vadd.vx v0, v2, a0, v0.t vadd.vi v0, v2, 1, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vadd.vi v0, v2, 1, v0.t + +vmsge.vx v0, v4, a0, v0.t +# CHECK-ERROR: too few operands for instruction +# CHECK-ERROR-LABEL: vmsge.vx v0, v4, a0, v0.t + +vmsge.vx v8, v4, a0, v0.t, v2 +# CHECK-ERROR: invalid operand for instruction +# CHECK-ERROR-LABEL: vmsge.vx v8, v4, a0, v0.t, v2 From f7c0e2b8f26fc6573f663f482aa64443ab6a6e71 Mon Sep 17 00:00:00 2001 From: Meera Nakrani Date: Fri, 2 Oct 2020 09:28:35 +0000 Subject: [PATCH 356/544] [ARM] Prevent constants from iCmp instruction from being hoisted if part of a min(max()) pattern Marks constants of an ICmp instruction as free if it's only user is a select instruction that is part of a min(max()) pattern. Ensures that in loops, in particular when loop unrolling is turned on, SSAT will still be correctly generated. Differential Revision: https://reviews.llvm.org/D88662 --- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 8 +- llvm/test/CodeGen/ARM/ssat-unroll-loops.ll | 123 ++++++++++++++++++ 2 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/ssat-unroll-loops.ll diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 892008dce87b2..3eb3bfb52d23d 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -362,8 +362,12 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, // Ensures negative constant of min(max()) or max(min()) patterns that // match to SSAT instructions don't get hoisted if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) && - Ty->getIntegerBitWidth() <= 32 && isSSATMinMaxPattern(Inst, Imm)) - return 0; + Ty->getIntegerBitWidth() <= 32) { + if (isSSATMinMaxPattern(Inst, Imm) || + (isa(Inst) && Inst->hasOneUse() && + isSSATMinMaxPattern(cast(*Inst->user_begin()), Imm))) + return 0; + } return getIntImmCost(Imm, Ty, CostKind); } diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll new file mode 100644 index 0000000000000..f1b4ab2d937d7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK + +; Checks SSAT is still generated when loop unrolling is on + +define void @ssat_unroll(i16* %pSrcA, i16* %pSrcB, i16* %pDst, i32 %blockSize) { +; CHECK-LABEL: ssat_unroll: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB0_5 +; CHECK-NEXT: @ %bb.1: @ %while.body.preheader +; CHECK-NEXT: sub r12, r3, #1 +; CHECK-NEXT: tst r3, #1 +; CHECK-NEXT: beq .LBB0_3 +; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader +; CHECK-NEXT: ldrsh lr, [r0], #2 +; CHECK-NEXT: ldrsh r3, [r1], #2 +; CHECK-NEXT: smulbb r3, r3, lr +; CHECK-NEXT: ssat r3, #16, r3, asr #14 +; CHECK-NEXT: strh r3, [r2], #2 +; CHECK-NEXT: mov r3, r12 +; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: popeq {r11, pc} +; CHECK-NEXT: .LBB0_4: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrsh r12, [r0] +; CHECK-NEXT: subs r3, r3, #2 +; CHECK-NEXT: ldrsh lr, [r1] +; CHECK-NEXT: smulbb r12, lr, r12 +; CHECK-NEXT: ssat r12, #16, r12, asr #14 +; CHECK-NEXT: strh r12, [r2] +; CHECK-NEXT: ldrsh r12, [r0, #2] +; CHECK-NEXT: add r0, r0, #4 +; CHECK-NEXT: ldrsh lr, [r1, #2] +; CHECK-NEXT: add r1, r1, #4 +; CHECK-NEXT: smulbb r12, lr, r12 +; CHECK-NEXT: ssat r12, #16, r12, asr #14 +; CHECK-NEXT: strh r12, [r2, #2] +; CHECK-NEXT: add r2, r2, #4 +; CHECK-NEXT: bne .LBB0_4 +; CHECK-NEXT: .LBB0_5: @ %while.end +; CHECK-NEXT: pop {r11, pc} +entry: + %cmp.not7 = icmp eq i32 %blockSize, 0 + br i1 %cmp.not7, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + %0 = add i32 %blockSize, -1 + %xtraiter = and i32 %blockSize, 1 + %lcmp.mod.not = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader + +while.body.prol.preheader: ; preds = %while.body.preheader + %incdec.ptr.prol = getelementptr inbounds i16, i16* %pSrcA, i32 1 + %1 = load i16, i16* %pSrcA + %conv.prol = sext i16 %1 to i32 + %incdec.ptr1.prol = getelementptr inbounds i16, i16* %pSrcB, i32 1 + %2 = load i16, i16* %pSrcB + %conv2.prol = sext i16 %2 to i32 + %mul.prol = mul nsw i32 %conv2.prol, %conv.prol + %shr.prol = ashr i32 %mul.prol, 14 + %3 = icmp sgt i32 %shr.prol, -32768 + %4 = select i1 %3, i32 %shr.prol, i32 -32768 + %5 = icmp slt i32 %4, 32767 + %spec.select.i.prol = select i1 %5, i32 %4, i32 32767 + %conv3.prol = trunc i32 %spec.select.i.prol to i16 + %incdec.ptr4.prol = getelementptr inbounds i16, i16* %pDst, i32 1 + store i16 %conv3.prol, i16* %pDst + br label %while.body.prol.loopexit + +while.body.prol.loopexit: ; preds = %while.body.prol.preheader, %while.body.preheader + %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ] + %pSrcA.addr.010.unr = phi i16* [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ] + %pDst.addr.09.unr = phi i16* [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ] + %pSrcB.addr.08.unr = phi i16* [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ] + %6 = icmp eq i32 %0, 0 + br i1 %6, label %while.end, label %while.body + +while.body: ; preds = %while.body.prol.loopexit, %while.body + %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ] + %pSrcA.addr.010 = phi i16* [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ] + %pDst.addr.09 = phi i16* [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ] + %pSrcB.addr.08 = phi i16* [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ] + %incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.010, i32 1 + %7 = load i16, i16* %pSrcA.addr.010 + %conv = sext i16 %7 to i32 + %incdec.ptr1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i32 1 + %8 = load i16, i16* %pSrcB.addr.08 + %conv2 = sext i16 %8 to i32 + %mul = mul nsw i32 %conv2, %conv + %shr = ashr i32 %mul, 14 + %9 = icmp sgt i32 %shr, -32768 + %10 = select i1 %9, i32 %shr, i32 -32768 + %11 = icmp slt i32 %10, 32767 + %spec.select.i = select i1 %11, i32 %10, i32 32767 + %conv3 = trunc i32 %spec.select.i to i16 + %incdec.ptr4 = getelementptr inbounds i16, i16* %pDst.addr.09, i32 1 + store i16 %conv3, i16* %pDst.addr.09 + %incdec.ptr.1 = getelementptr inbounds i16, i16* %pSrcA.addr.010, i32 2 + %12 = load i16, i16* %incdec.ptr + %conv.1 = sext i16 %12 to i32 + %incdec.ptr1.1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i32 2 + %13 = load i16, i16* %incdec.ptr1 + %conv2.1 = sext i16 %13 to i32 + %mul.1 = mul nsw i32 %conv2.1, %conv.1 + %shr.1 = ashr i32 %mul.1, 14 + %14 = icmp sgt i32 %shr.1, -32768 + %15 = select i1 %14, i32 %shr.1, i32 -32768 + %16 = icmp slt i32 %15, 32767 + %spec.select.i.1 = select i1 %16, i32 %15, i32 32767 + %conv3.1 = trunc i32 %spec.select.i.1 to i16 + %incdec.ptr4.1 = getelementptr inbounds i16, i16* %pDst.addr.09, i32 2 + store i16 %conv3.1, i16* %incdec.ptr4 + %dec.1 = add i32 %blkCnt.011, -2 + %cmp.not.1 = icmp eq i32 %dec.1, 0 + br i1 %cmp.not.1, label %while.end, label %while.body + +while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry + ret void +} From 670e60c0238bb8e9fb39947017dc3b5459c8ee60 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 10:34:01 +0100 Subject: [PATCH 357/544] [InstCombine] Add partial bswap test from D88578 --- llvm/test/Transforms/InstCombine/bswap.ll | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 8adcb748b96f0..4c6e3dd9a2ff5 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -345,6 +345,28 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { ret i8 %7 } +define i32 @partial_bswap(i32 %x) { +; CHECK-LABEL: @partial_bswap( +; CHECK-NEXT: [[X3:%.*]] = shl i32 [[X:%.*]], 24 +; CHECK-NEXT: [[A2:%.*]] = shl i32 [[X]], 8 +; CHECK-NEXT: [[X2:%.*]] = and i32 [[A2]], 16711680 +; CHECK-NEXT: [[X32:%.*]] = or i32 [[X3]], [[X2]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X]], -65536 +; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[T1]]) +; CHECK-NEXT: [[R:%.*]] = or i32 [[X32]], [[T2]] +; CHECK-NEXT: ret i32 [[R]] +; + %x3 = shl i32 %x, 24 + %a2 = shl i32 %x, 8 + %x2 = and i32 %a2, 16711680 + %x32 = or i32 %x3, %x2 + %t1 = and i32 %x, -65536 + %t2 = call i32 @llvm.bswap.i32(i32 %t1) + %r = or i32 %x32, %t2 + ret i32 %r +} +declare i32 @llvm.bswap.i32(i32) + define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 From e3de249a4c94d6962b36c2b4747c134d152bed37 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 2 Oct 2020 05:32:35 -0400 Subject: [PATCH 358/544] [mlir] Add a subtensor operation This revision introduces a `subtensor` op, which is the counterpart of `subview` for a tensor operand. This also refactors the relevant pieces to allow reusing the `subview` implementation where appropriate. This operation will be used to implement tiling for Linalg on tensors. --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 4 +- .../include/mlir/Dialect/StandardOps/IR/Ops.h | 13 +- .../mlir/Dialect/StandardOps/IR/Ops.td | 403 +++++++++++++----- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 14 +- mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 15 +- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 9 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 13 +- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 339 ++++++++------- mlir/test/IR/core-ops.mlir | 24 ++ mlir/test/IR/invalid-ops.mlir | 20 + .../lib/Transforms/TestLinalgTransforms.cpp | 3 +- 11 files changed, 561 insertions(+), 296 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 76ce4eb30e7f3..b4e5be58bad73 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -185,7 +185,7 @@ struct ProcInfo { Value nprocs; }; using ProcInfoCallBackFn = std::function( - OpBuilder &b, Location loc, ArrayRef parallelLoopRanges)>; + OpBuilder &b, Location loc, ArrayRef parallelLoopRanges)>; /// Options that allow distribution of loops generated in Linalg transforms to /// processors while generating the loops. @@ -216,7 +216,7 @@ struct GenerateLoopNest { AffineIndexedValue, StdIndexedValue>::type; static void - doit(ArrayRef loopRanges, ValueRange iterArgInitValues, + doit(ArrayRef loopRanges, ValueRange iterArgInitValues, ArrayRef iteratorTypes, function_ref bodyBuilderFn, Optional = None); diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index 2500343c0af38..fbe735e31cff9 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -33,6 +33,17 @@ class Builder; class FuncOp; class OpBuilder; +/// Auxiliary range data structure to unpack the offset, size and stride +/// operands of the SubViewOp / SubTensorOp into a list of triples. +/// Such a list of triple is sometimes more convenient to manipulate. +struct Range { + Value offset; + Value size; + Value stride; +}; + +raw_ostream &operator<<(raw_ostream &os, Range &range); + #define GET_OP_CLASSES #include "mlir/Dialect/StandardOps/IR/Ops.h.inc" @@ -300,8 +311,6 @@ ParseResult parseDimAndSymbolList(OpAsmParser &parser, SmallVectorImpl &operands, unsigned &numDims); -raw_ostream &operator<<(raw_ostream &os, SubViewOp::Range &range); - /// Determines whether MemRefCastOp casts to a more dynamic version of the /// source memref. This is useful to to fold a memref_cast into a consuming op /// and implement canonicalization patterns for ops in different dialects that diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index ff1a82c265614..dbc3e4ca521bf 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2706,11 +2706,214 @@ def SubIOp : IntArithmeticOp<"subi"> { // SubViewOp //===----------------------------------------------------------------------===// -def SubViewOp : Std_Op<"subview", [ - AttrSizedOperandSegments, - DeclareOpInterfaceMethods, - NoSideEffect, - ]> { +class BaseOpWithOffsetSizesAndStrides traits = []> : + Std_Op { + let builders = [ + // Build a SubViewOp with mixed static and dynamic entries. + OpBuilder< + "Value source, ArrayRef staticOffsets, " + "ArrayRef staticSizes, ArrayRef staticStrides, " + "ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}">, + // Build a SubViewOp with all dynamic entries. + OpBuilder< + "Value source, ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}"> + ]; + + code extraBaseClassDeclaration = [{ + /// Returns the number of dynamic offset operands. + int64_t getNumOffsets() { return llvm::size(offsets()); } + + /// Returns the number of dynamic size operands. + int64_t getNumSizes() { return llvm::size(sizes()); } + + /// Returns the number of dynamic stride operands. + int64_t getNumStrides() { return llvm::size(strides()); } + + /// Returns the dynamic sizes for this subview operation if specified. + operand_range getDynamicSizes() { return sizes(); } + + /// Returns in `staticStrides` the static value of the stride + /// operands. Returns failure() if the static value of the stride + /// operands could not be retrieved. + LogicalResult getStaticStrides(SmallVectorImpl &staticStrides) { + if (!strides().empty()) + return failure(); + staticStrides.reserve(static_strides().size()); + for (auto s : static_strides().getAsValueRange()) + staticStrides.push_back(s.getZExtValue()); + return success(); + } + + /// Return the list of Range (i.e. offset, size, stride). Each + /// Range entry contains either the dynamic value or a ConstantIndexOp + /// constructed with `b` at location `loc`. + SmallVector getOrCreateRanges(OpBuilder &b, Location loc); + + /// Return the offsets as Values. Each Value is either the dynamic + /// value specified in the op or a ConstantIndexOp constructed + /// with `b` at location `loc` + SmallVector getOrCreateOffsets(OpBuilder &b, Location loc) { + unsigned dynamicIdx = 1; + return llvm::to_vector<4>(llvm::map_range( + static_offsets().cast(), [&](Attribute a) -> Value { + int64_t staticOffset = a.cast().getInt(); + if (ShapedType::isDynamicStrideOrOffset(staticOffset)) + return getOperand(dynamicIdx++); + else + return b.create( + loc, b.getIndexType(), b.getIndexAttr(staticOffset)); + })); + } + + /// Return the sizes as Values. Each Value is either the dynamic + /// value specified in the op or a ConstantIndexOp constructed + /// with `b` at location `loc` + SmallVector getOrCreateSizes(OpBuilder &b, Location loc) { + unsigned dynamicIdx = 1 + offsets().size(); + return llvm::to_vector<4>(llvm::map_range( + static_sizes().cast(), [&](Attribute a) -> Value { + int64_t staticSize = a.cast().getInt(); + if (ShapedType::isDynamic(staticSize)) + return getOperand(dynamicIdx++); + else + return b.create( + loc, b.getIndexType(), b.getIndexAttr(staticSize)); + })); + } + + /// Return the strides as Values. Each Value is either the dynamic + /// value specified in the op or a ConstantIndexOp constructed with + /// `b` at location `loc` + SmallVector getOrCreateStrides(OpBuilder &b, Location loc) { + unsigned dynamicIdx = 1 + offsets().size() + sizes().size(); + return llvm::to_vector<4>(llvm::map_range( + static_strides().cast(), [&](Attribute a) -> Value { + int64_t staticStride = a.cast().getInt(); + if (ShapedType::isDynamicStrideOrOffset(staticStride)) + return getOperand(dynamicIdx++); + else + return b.create( + loc, b.getIndexType(), b.getIndexAttr(staticStride)); + })); + } + + /// Return the rank of the source ShapedType. + unsigned getSourceRank() { + return source().getType().cast().getRank(); + } + + /// Return the rank of the result ShapedType. + unsigned getResultRank() { return getType().getRank(); } + + /// Return true if the offset `idx` is a static constant. + bool isDynamicOffset(unsigned idx) { + APInt v = *(static_offsets().getAsValueRange().begin() + idx); + return ShapedType::isDynamicStrideOrOffset(v.getSExtValue()); + } + /// Return true if the size `idx` is a static constant. + bool isDynamicSize(unsigned idx) { + APInt v = *(static_sizes().getAsValueRange().begin() + idx); + return ShapedType::isDynamic(v.getSExtValue()); + } + + /// Return true if the stride `idx` is a static constant. + bool isDynamicStride(unsigned idx) { + APInt v = *(static_strides().getAsValueRange().begin() + idx); + return ShapedType::isDynamicStrideOrOffset(v.getSExtValue()); + } + + /// Assert the offset `idx` is a static constant and return its value. + int64_t getStaticOffset(unsigned idx) { + assert(!isDynamicOffset(idx) && "expected static offset"); + APInt v = *(static_offsets().getAsValueRange().begin() + idx); + return v.getSExtValue(); + } + /// Assert the size `idx` is a static constant and return its value. + int64_t getStaticSize(unsigned idx) { + assert(!isDynamicSize(idx) && "expected static size"); + APInt v = *(static_sizes().getAsValueRange().begin() + idx); + return v.getSExtValue(); + } + /// Assert the stride `idx` is a static constant and return its value. + int64_t getStaticStride(unsigned idx) { + assert(!isDynamicStride(idx) && "expected static stride"); + APInt v = *(static_strides().getAsValueRange().begin() + idx); + return v.getSExtValue(); + } + + unsigned getNumDynamicEntriesUpToIdx(ArrayAttr attr, + llvm::function_ref isDynamic, unsigned idx) { + return std::count_if( + attr.getValue().begin(), attr.getValue().begin() + idx, + [&](Attribute attr) { + return isDynamic(attr.cast().getInt()); + }); + } + /// Assert the offset `idx` is dynamic and return the position of the + /// corresponding operand. + unsigned getIndexOfDynamicOffset(unsigned idx) { + assert(isDynamicOffset(idx) && "expected static offset"); + auto numDynamic = + getNumDynamicEntriesUpToIdx(static_offsets().cast(), + ShapedType::isDynamicStrideOrOffset, idx); + return 1 + numDynamic; + } + /// Assert the size `idx` is dynamic and return the position of the + /// corresponding operand. + unsigned getIndexOfDynamicSize(unsigned idx) { + assert(isDynamicSize(idx) && "expected static size"); + auto numDynamic = getNumDynamicEntriesUpToIdx( + static_sizes().cast(), ShapedType::isDynamic, idx); + return 1 + offsets().size() + numDynamic; + } + /// Assert the stride `idx` is dynamic and return the position of the + /// corresponding operand. + unsigned getIndexOfDynamicStride(unsigned idx) { + assert(isDynamicStride(idx) && "expected static stride"); + auto numDynamic = + getNumDynamicEntriesUpToIdx(static_strides().cast(), + ShapedType::isDynamicStrideOrOffset, idx); + return 1 + offsets().size() + sizes().size() + numDynamic; + } + + /// Assert the offset `idx` is dynamic and return its value. + Value getDynamicOffset(unsigned idx) { + return getOperand(getIndexOfDynamicOffset(idx)); + } + /// Assert the size `idx` is dynamic and return its value. + Value getDynamicSize(unsigned idx) { + return getOperand(getIndexOfDynamicSize(idx)); + } + /// Assert the stride `idx` is dynamic and return its value. + Value getDynamicStride(unsigned idx) { + return getOperand(getIndexOfDynamicStride(idx)); + } + + static StringRef getStaticOffsetsAttrName() { + return "static_offsets"; + } + static StringRef getStaticSizesAttrName() { + return "static_sizes"; + } + static StringRef getStaticStridesAttrName() { + return "static_strides"; + } + static ArrayRef getSpecialAttrNames() { + static SmallVector names{ + getStaticOffsetsAttrName(), + getStaticSizesAttrName(), + getStaticStridesAttrName(), + getOperandSegmentSizeAttr()}; + return names; + } + }]; +} + +def SubViewOp : BaseOpWithOffsetSizesAndStrides< + "subview", [DeclareOpInterfaceMethods] > { let summary = "memref subview operation"; let description = [{ The "subview" operation converts a memref type to another memref type @@ -2726,8 +2929,11 @@ def SubViewOp : Std_Op<"subview", [ * Sizes: memref-rank number of dynamic sizes or static integer attributes which specify the sizes of the result "view" memref type. * Strides: memref-rank number of dynamic strides or static integer - attributes multiplicatively to the base memref strides in each - dimension. + attributes that compose multiplicatively with the base memref + strides in each dimension. + + A subview operation may additionally reduce the rank of the resulting view + by removing dimensions that are statically known to be of size 1. Example 1: @@ -2817,6 +3023,15 @@ def SubViewOp : Std_Op<"subview", [ // memref is "inbounds" w.r.t to base memref. It is upto the client // to ensure that the subview is accessed in a manner that is // in-bounds. + + Example 5: + + ``` + // Rank-reducing subview. + %1 = subview %0[0, 0, 0][1, 16, 4][1, 1, 1] : + memref<8x16x4xf32> to memref<16x4xf32> + %3 = subview %2[3, 4, 2][1, 6, 3][1, 1, 1] : + memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]> ``` } }]; @@ -2859,137 +3074,97 @@ def SubViewOp : Std_Op<"subview", [ "ArrayRef attrs = {}"> ]; - let extraClassDeclaration = [{ + let extraClassDeclaration = extraBaseClassDeclaration # [{ /// Returns the type of the base memref operand. - MemRefType getBaseMemRefType() { + MemRefType getSourceMemRefType() { return source().getType().cast(); } /// The result of a subview is always a memref. MemRefType getType() { return getResult().getType().cast(); } - /// Returns as integer value the number of offset operands. - int64_t getNumOffsets() { return llvm::size(offsets()); } + /// A subview result type can be fully inferred from the source type and the + /// static representation of offsets, sizes and strides. Special sentinels + /// encode the dynamic case. + static Type inferResultType(MemRefType sourceMemRefType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides); + }]; - /// Returns as integer value the number of size operands. - int64_t getNumSizes() { return llvm::size(sizes()); } + let hasCanonicalizer = 1; +} - /// Returns as integer value the number of stride operands. - int64_t getNumStrides() { return llvm::size(strides()); } +//===----------------------------------------------------------------------===// +// SubTensorOp +//===----------------------------------------------------------------------===// - /// Returns the dynamic sizes for this subview operation if specified. - operand_range getDynamicSizes() { return sizes(); } +def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor"> { + let summary = "subtensor operation"; + let description = [{ + The "subtensor" operation extract a tensor from another tensor as + specified by the operation's offsets, sizes and strides arguments. - /// Returns in `staticStrides` the static value of the stride - /// operands. Returns failure() if the static value of the stride - /// operands could not be retrieved. - LogicalResult getStaticStrides(SmallVectorImpl &staticStrides); - - /// Auxiliary range data structure and helper function that unpacks the - /// offset, size and stride operands of the SubViewOp into a list of triples. - /// Such a list of triple is sometimes more convenient to manipulate. - struct Range { - Value offset, size, stride; - }; - /// Return the list of SubViewOp::Range (i.e. offset, size, stride). Each - /// Range entry contains either the dynamic value or a ConstantIndexOp - /// constructed with `b` at location `loc`. - SmallVector getOrCreateRanges(OpBuilder &b, Location loc); + The subtensor operation supports the following arguments: - /// Return the offsets as Values. Each Value is either the dynamic - /// value specified in the op or a ConstantIndexOp constructed - /// with `b` at location `loc` - SmallVector getOrCreateOffsets(OpBuilder &b, Location loc); + * tensor: the "base" tensor from which to extract a subtensor. + * offsets: tensor-rank number of dynamic offsets or static integer + attributes into the "base" tensor from which to extract the + subtensor. + * sizes: tensor-rank number of dynamic sizes or static integer attributes + which specify the sizes of the result tensor type. + * strides: tensor-rank number of dynamic strides or static integer + attributes specifying susampling in each dimension. - /// Return the sizes as Values. Each Value is either the dynamic - /// value specified in the op or a ConstantIndexOp constructed - /// with `b` at location `loc` - SmallVector getOrCreateSizes(OpBuilder &b, Location loc); + After buffer-allocation, the "subtensor" op is expected to lower into a + "subview" op. - /// Return the strides as Values. Each Value is either the dynamic - /// value specified in the op or a ConstantIndexOp constructed with - /// `b` at location `loc` - SmallVector getOrCreateStrides(OpBuilder &b, Location loc); + A subtensor operation may additionally reduce the rank of the resulting + tensor by removing dimensions that are statically known to be of size 1. - /// A subview result type can be fully inferred from the source type and the - /// static representation of offsets, sizes and strides. Special sentinels - /// encode the dynamic case. - static Type inferSubViewResultType(MemRefType sourceMemRefType, - ArrayRef staticOffsets, - ArrayRef staticSizes, - ArrayRef staticStrides); + Example: - /// Return the rank of the result MemRefType. - unsigned getRank() { return getType().getRank(); } + ``` + // Rank-reducing subtensor. + %1 = subtensor %0[0, 0, 0][1, 16, 4][1, 1, 1] : + tensor<8x16x4xf32> to tensor<16x4xf32> + %3 = subtensor %2[3, 4, 2][1, 6, 3][1, 1, 1] : + tensor<8x16x4xf32> to tensor<6x3xf32> + ``` + }]; - /// Return true if the offset `idx` is a static constant. - bool isDynamicOffset(unsigned idx); - /// Return true if the size `idx` is a static constant. - bool isDynamicSize(unsigned idx); - /// Return true if the stride `idx` is a static constant. - bool isDynamicStride(unsigned idx); + let arguments = (ins + AnyRankedTensor:$source, + Variadic:$offsets, + Variadic:$sizes, + Variadic:$strides, + I64ArrayAttr:$static_offsets, + I64ArrayAttr:$static_sizes, + I64ArrayAttr:$static_strides + ); + let results = (outs AnyRankedTensor:$result); - /// Assert the offset `idx` is a static constant and return its value. - int64_t getStaticOffset(unsigned idx) { - assert(!isDynamicOffset(idx) && "expected static offset"); - return - static_offsets().cast()[idx].cast().getInt(); - } - /// Assert the size `idx` is a static constant and return its value. - int64_t getStaticSize(unsigned idx) { - assert(!isDynamicSize(idx) && "expected static size"); - return static_sizes().cast()[idx].cast().getInt(); - } - /// Assert the stride `idx` is a static constant and return its value. - int64_t getStaticStride(unsigned idx) { - assert(!isDynamicStride(idx) && "expected static stride"); - return - static_strides().cast()[idx].cast().getInt(); + let extraClassDeclaration = extraBaseClassDeclaration # [{ + /// Returns the type of the base tensor operand. + RankedTensorType getSourceRankedTensorType() { + return source().getType().cast(); } - /// Assert the offset `idx` is dynamic and return the position of the - /// corresponding operand. - unsigned getIndexOfDynamicOffset(unsigned idx); - /// Assert the size `idx` is dynamic and return the position of the - /// corresponding operand. - unsigned getIndexOfDynamicSize(unsigned idx); - /// Assert the stride `idx` is dynamic and return the position of the - /// corresponding operand. - unsigned getIndexOfDynamicStride(unsigned idx); - - /// Assert the offset `idx` is dynamic and return its value. - Value getDynamicOffset(unsigned idx) { - return getOperand(getIndexOfDynamicOffset(idx)); - } - /// Assert the size `idx` is dynamic and return its value. - Value getDynamicSize(unsigned idx) { - return getOperand(getIndexOfDynamicSize(idx)); - } - /// Assert the stride `idx` is dynamic and return its value. - Value getDynamicStride(unsigned idx) { - return getOperand(getIndexOfDynamicStride(idx)); + /// The result of a subtensor is always a tensor. + RankedTensorType getType() { + return getResult().getType().cast(); } - static StringRef getStaticOffsetsAttrName() { - return "static_offsets"; - } - static StringRef getStaticSizesAttrName() { - return "static_sizes"; - } - static StringRef getStaticStridesAttrName() { - return "static_strides"; - } - static ArrayRef getSpecialAttrNames() { - static SmallVector names{ - getStaticOffsetsAttrName(), - getStaticSizesAttrName(), - getStaticStridesAttrName(), - getOperandSegmentSizeAttr()}; - return names; - } + /// A subview result type can be fully inferred from the source type and the + /// static representation of offsets, sizes and strides. Special sentinels + /// encode the dynamic case. + static Type inferResultType(RankedTensorType sourceRankedTensorType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides); }]; - let hasCanonicalizer = 1; + // let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index c964c2466d5c0..7b16a9197f116 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -60,7 +60,7 @@ using llvm::dbgs; // This is achieved by applying the `loopToOperandRangesMaps` permutation maps // to the `loopRanges` in order to obtain view ranges. static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op, - ArrayRef loopRanges) { + ArrayRef loopRanges) { assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); auto maps = op.indexing_maps(); SmallVector clonedViews; @@ -73,7 +73,7 @@ static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op, auto map = maps[idx].cast().getValue(); LLVM_DEBUG(dbgs() << "map: " << map << "\n"); Value view = en.value(); - SmallVector viewRanges(map.getNumResults()); + SmallVector viewRanges(map.getNumResults()); for (auto en2 : llvm::enumerate(map.getResults())) { unsigned d = en2.index(); // loopToOperandRangesMaps are permutations-only. @@ -182,7 +182,7 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer, unsigned producerIdx, unsigned nPar = producer.getNumParallelLoops(); unsigned nRed = producer.getNumReductionLoops(); unsigned nWin = producer.getNumWindowLoops(); - SmallVector loopRanges(nPar + nRed + nWin); + SmallVector loopRanges(nPar + nRed + nWin); // Iterate over dimensions identified by the producer map for `producerIdx`. // This defines a subset of the loop ranges that we need to complete later. @@ -202,9 +202,9 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer, unsigned producerIdx, << "existing LoopRange: " << loopRanges[i] << "\n"); else { auto viewDim = getViewDefiningLoopRange(producer, i); - loopRanges[i] = SubViewOp::Range{folded_std_constant_index(folder, 0), - std_dim(viewDim.view, viewDim.dimension), - folded_std_constant_index(folder, 1)}; + loopRanges[i] = Range{folded_std_constant_index(folder, 0), + std_dim(viewDim.view, viewDim.dimension), + folded_std_constant_index(folder, 1)}; LLVM_DEBUG(llvm::dbgs() << "new LoopRange: " << loopRanges[i] << "\n"); } } @@ -300,8 +300,6 @@ static bool isSameSubView(Value a, Value b) { return false; if (sva.getType() != svb.getType()) return false; - if (sva.getRank() != svb.getRank()) - return false; if (sva.getNumOperands() != svb.getNumOperands()) return false; if (sva.static_offsets() != svb.static_offsets()) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index eb452cc40305b..a9e7a86602300 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -65,22 +65,21 @@ static SmallVector permuteIvs(ArrayRef ivs, /// DimExpr or (DimExpr + DimExpr - SymbolExpr floordiv ConstExpr). /// It expects a non-inverted, concatenated map and last values in /// allViewSizes will be applied to the symbols in the map if it contains any. -static SmallVector emitLoopRanges(OpBuilder &b, - Location loc, - AffineMap map, - ValueRange viewSizes) { +static SmallVector emitLoopRanges(OpBuilder &b, Location loc, + AffineMap map, + ValueRange viewSizes) { unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); unsigned numSym = map.getNumSymbols(); assert(viewSizes.size() == numRes + numSym && "viewSizes must contain sizes of all views and values for symbols"); - SmallVector res(numDims); + SmallVector res(numDims); for (unsigned idx = 0; idx < numRes; ++idx) { auto result = map.getResult(idx); if (auto d = result.dyn_cast()) { if (res[d.getPosition()].offset) continue; - res[d.getPosition()] = SubViewOp::Range{ - std_constant_index(0), viewSizes[idx], std_constant_index(1)}; + res[d.getPosition()] = + Range{std_constant_index(0), viewSizes[idx], std_constant_index(1)}; } // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2), @@ -124,7 +123,7 @@ static SmallVector emitLoopRanges(OpBuilder &b, // Construction of the lower bound (s floordiv 2). Value from = applyMapToValues(b, loc, fromMap, values).front(); Value to = applyMapToValues(b, loc, toMap, values).front(); - res[mPos] = SubViewOp::Range{from, to, std_constant_index(1)}; + res[mPos] = Range{from, to, std_constant_index(1)}; } } return res; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 68d69549611cc..3e8e0b74c1459 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -54,7 +54,7 @@ using LoopIndexToRangeIndexMap = DenseMap; // are tiled and for which new loops will be created. Also the function returns // a map from loop indices of the LinalgOp to the corresponding non-empty range // indices of newly created loops. -static std::tuple, LoopIndexToRangeIndexMap> +static std::tuple, LoopIndexToRangeIndexMap> makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map, ArrayRef allViewSizes, ArrayRef allTileSizes) { @@ -76,10 +76,9 @@ makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map, } // Create a new range with the applied tile sizes. - SmallVector res; + SmallVector res; for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) - res.push_back(SubViewOp::Range{std_constant_index(0), viewSizes[idx], - tileSizes[idx]}); + res.push_back(Range{std_constant_index(0), viewSizes[idx], tileSizes[idx]}); return std::make_tuple(res, loopIndexToRangeIndex); } @@ -346,7 +345,7 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, if (!viewSizesToLoopsMap) return llvm::None; - SmallVector loopRanges; + SmallVector loopRanges; LoopIndexToRangeIndexMap loopIndexToRangeIndex; std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges( b, op.getLoc(), viewSizesToLoopsMap, allViewSizes, tileSizes); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 204716b407466..f9ea9092d55dd 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -133,11 +133,10 @@ template struct mlir::linalg::GenerateLoopNest; /// Given a list of subview ranges, extract individual values for lower, upper /// bounds and steps and put them into the corresponding vectors. -static void unpackRanges(ArrayRef ranges, - SmallVectorImpl &lbs, +static void unpackRanges(ArrayRef ranges, SmallVectorImpl &lbs, SmallVectorImpl &ubs, SmallVectorImpl &steps) { - for (SubViewOp::Range range : ranges) { + for (Range range : ranges) { lbs.emplace_back(range.offset); ubs.emplace_back(range.size); steps.emplace_back(range.stride); @@ -194,7 +193,7 @@ getLoopRanges(OpBuilder &builder, LinalgOp linalgOp, OperationFolder *folder) { /// Specialization to build an scf "for" nest. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef loopRanges, ValueRange iterArgInitValues, ArrayRef iteratorTypes, function_ref bodyBuilderFn, Optional) { @@ -206,7 +205,7 @@ void GenerateLoopNest::doit( /// Specialization to build affine "for" nest. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef loopRanges, ValueRange iterArgInitValues, ArrayRef iteratorTypes, function_ref bodyBuilderFn, Optional) { @@ -364,7 +363,7 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps, /// Specialization for generating a mix of parallel and sequential scf loops. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, ValueRange iterArgInitValues, + ArrayRef loopRanges, ValueRange iterArgInitValues, ArrayRef iteratorTypes, function_ref bodyBuilderFn, Optional distributionOptions) { @@ -391,7 +390,7 @@ void GenerateLoopNest::doit( Location loc = edsc::ScopedContext::getLocation(); distributionMethod.assign(distributionOptions->distributionMethod.begin(), distributionOptions->distributionMethod.end()); - SmallVector parallelLoopRanges; + SmallVector parallelLoopRanges; for (auto iteratorType : enumerate(iteratorTypes)) { if (isParallelIteratorType(iteratorType.value())) parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 1cabf172b7fcc..d684a4b98e556 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2587,10 +2587,10 @@ Wrapper operator*(Wrapper a, int64_t b) { /// A subview result type can be fully inferred from the source type and the /// static representation of offsets, sizes and strides. Special sentinels /// encode the dynamic case. -Type SubViewOp::inferSubViewResultType(MemRefType sourceMemRefType, - ArrayRef staticOffsets, - ArrayRef staticSizes, - ArrayRef staticStrides) { +Type SubViewOp::inferResultType(MemRefType sourceMemRefType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides) { unsigned rank = sourceMemRefType.getRank(); (void)rank; assert(staticOffsets.size() == rank && @@ -2638,7 +2638,8 @@ Type SubViewOp::inferSubViewResultType(MemRefType sourceMemRefType, /// subview ssa-name `[` offset-list `]` `[` size-list `]` `[` stride-list `]` /// `:` strided-memref-type `to` strided-memref-type /// ``` -static void print(OpAsmPrinter &p, SubViewOp op) { +template +static void printOpWithOffsetsSizesAndStrides(OpAsmPrinter &p, OpType op) { int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1; p << op.getOperation()->getName().getStringRef().drop_front(stdDotLen) << ' '; p << op.getOperand(0); @@ -2649,16 +2650,22 @@ static void print(OpAsmPrinter &p, SubViewOp op) { printSubViewListOfOperandsOrIntegers(p, op.strides(), op.static_strides(), ShapedType::isDynamicStrideOrOffset); p.printOptionalAttrDict(op.getAttrs(), - /*elidedAttrs=*/{SubViewOp::getSpecialAttrNames()}); + /*elidedAttrs=*/{OpType::getSpecialAttrNames()}); p << " : " << op.getOperand(0).getType() << " to " << op.getType(); } +static void print(OpAsmPrinter &p, SubViewOp op) { + return printOpWithOffsetsSizesAndStrides(p, op); +} + /// Parse SubViewOp of the form: /// ``` -/// subview ssa-name `[` offset-list `]` `[` size-list `]` `[` stride-list `]` +/// `name` ssa-name `[` offset-list `]` `[` size-list `]` `[` stride-list `]` /// `:` strided-memref-type `to` strided-memref-type /// ``` -static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) { +template +static ParseResult parseOpWithOffsetsSizesAndStrides(OpAsmParser &parser, + OperationState &result) { OpAsmParser::OperandType srcInfo; SmallVector offsetsInfo, sizesInfo, stridesInfo; auto indexType = parser.getBuilder().getIndexType(); @@ -2666,13 +2673,13 @@ static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) { if (parser.parseOperand(srcInfo)) return failure(); if (parseListOfOperandsOrIntegers( - parser, result, SubViewOp::getStaticOffsetsAttrName(), + parser, result, OpType::getStaticOffsetsAttrName(), ShapedType::kDynamicStrideOrOffset, offsetsInfo) || parseListOfOperandsOrIntegers(parser, result, - SubViewOp::getStaticSizesAttrName(), + OpType::getStaticSizesAttrName(), ShapedType::kDynamicSize, sizesInfo) || parseListOfOperandsOrIntegers( - parser, result, SubViewOp::getStaticStridesAttrName(), + parser, result, OpType::getStaticStridesAttrName(), ShapedType::kDynamicStrideOrOffset, stridesInfo)) return failure(); @@ -2680,7 +2687,7 @@ static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) { SmallVector segmentSizes{1, static_cast(offsetsInfo.size()), static_cast(sizesInfo.size()), static_cast(stridesInfo.size())}; - result.addAttribute(SubViewOp::getOperandSegmentSizeAttr(), + result.addAttribute(OpType::getOperandSegmentSizeAttr(), b.getI32VectorAttr(segmentSizes)); return failure( @@ -2694,6 +2701,10 @@ static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) { parser.addTypeToList(dstType, result.types)); } +static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) { + return parseOpWithOffsetsSizesAndStrides(parser, result); +} + void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source, ArrayRef staticOffsets, ArrayRef staticSizes, @@ -2701,8 +2712,8 @@ void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source, ValueRange sizes, ValueRange strides, ArrayRef attrs) { auto sourceMemRefType = source.getType().cast(); - auto resultType = inferSubViewResultType(sourceMemRefType, staticOffsets, - staticSizes, staticStrides); + auto resultType = inferResultType(sourceMemRefType, staticOffsets, + staticSizes, staticStrides); build(b, result, resultType, source, offsets, sizes, strides, b.getI64ArrayAttr(staticOffsets), b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides)); @@ -2760,15 +2771,18 @@ void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, staticStridesVector, offsets, sizes, strides, attrs); } +/// For ViewLikeOpInterface. +Value SubViewOp::getViewSource() { return source(); } + /// Verify that a particular offset/size/stride static attribute is well-formed. -static LogicalResult -verifySubViewOpPart(SubViewOp op, StringRef name, StringRef attrName, - ArrayAttr attr, llvm::function_ref isDynamic, - ValueRange values) { +template +static LogicalResult verifyOpWithOffsetSizesAndStridesPart( + OpType op, StringRef name, StringRef attrName, ArrayAttr attr, + llvm::function_ref isDynamic, ValueRange values) { /// Check static and dynamic offsets/sizes/strides breakdown. - size_t inputRank = op.source().getType().cast().getRank(); - if (attr.size() != inputRank) - return op.emitError("expected ") << inputRank << " " << name << " values"; + if (attr.size() != op.getSourceRank()) + return op.emitError("expected ") + << op.getSourceRank() << " " << name << " values"; unsigned expectedNumDynamicEntries = llvm::count_if(attr.getValue(), [&](Attribute attr) { return isDynamic(attr.cast().getInt()); @@ -2787,17 +2801,26 @@ static SmallVector extractFromI64ArrayAttr(Attribute attr) { })); } -/// Checks if `original` MemRef type can be rank reduced to `reduced` type. +/// Checks if `original` Type type can be rank reduced to `reduced` type. /// This function is slight variant of `is subsequence` algorithm where /// not matching dimension must be 1. static bool isRankReducedType(Type originalType, Type reducedType) { if (originalType == reducedType) return true; + if (!originalType.isa() && !originalType.isa()) + return true; + if (originalType.isa() && + !reducedType.isa()) + return true; + if (originalType.isa() && !reducedType.isa()) + return true; - MemRefType original = originalType.cast(); - MemRefType reduced = reducedType.cast(); - ArrayRef originalShape = original.getShape(); - ArrayRef reducedShape = reduced.getShape(); + ShapedType originalShapedType = originalType.cast(); + ShapedType reducedShapedType = reducedType.cast(); + + // Rank and size logic is valid for all ShapedTypes. + ArrayRef originalShape = originalShapedType.getShape(); + ArrayRef reducedShape = reducedShapedType.getShape(); unsigned originalRank = originalShape.size(), reducedRank = reducedShape.size(); if (reducedRank > originalRank) @@ -2819,6 +2842,13 @@ static bool isRankReducedType(Type originalType, Type reducedType) { if (reducedIdx != reducedRank) return false; + // We are done for the tensor case. + if (originalType.isa()) + return true; + + // Strided layout logic is relevant for MemRefType only. + MemRefType original = originalType.cast(); + MemRefType reduced = reducedType.cast(); MLIRContext *c = original.getContext(); int64_t originalOffset, symCounter = 0, dimCounter = 0; SmallVector originalStrides; @@ -2843,10 +2873,29 @@ static bool isRankReducedType(Type originalType, Type reducedType) { reducedMap == reduced.getAffineMaps().front()); } +template +static LogicalResult verifyOpWithOffsetSizesAndStrides(OpType op) { + // Verify static attributes offsets/sizes/strides. + if (failed(verifyOpWithOffsetSizesAndStridesPart( + op, "offset", op.getStaticOffsetsAttrName(), op.static_offsets(), + ShapedType::isDynamicStrideOrOffset, op.offsets()))) + return failure(); + + if (failed(verifyOpWithOffsetSizesAndStridesPart( + op, "size", op.getStaticSizesAttrName(), op.static_sizes(), + ShapedType::isDynamic, op.sizes()))) + return failure(); + if (failed(verifyOpWithOffsetSizesAndStridesPart( + op, "stride", op.getStaticStridesAttrName(), op.static_strides(), + ShapedType::isDynamicStrideOrOffset, op.strides()))) + return failure(); + return success(); +} + /// Verifier for SubViewOp. static LogicalResult verify(SubViewOp op) { - auto baseType = op.getBaseMemRefType().cast(); - auto subViewType = op.getType(); + MemRefType baseType = op.getSourceMemRefType(); + MemRefType subViewType = op.getType(); // The base memref and the view memref should be in the same memory space. if (baseType.getMemorySpace() != subViewType.getMemorySpace()) @@ -2858,24 +2907,12 @@ static LogicalResult verify(SubViewOp op) { if (!isStrided(baseType)) return op.emitError("base type ") << baseType << " is not strided"; - // Verify static attributes offsets/sizes/strides. - if (failed(verifySubViewOpPart( - op, "offset", op.getStaticOffsetsAttrName(), op.static_offsets(), - ShapedType::isDynamicStrideOrOffset, op.offsets()))) - return failure(); - - if (failed(verifySubViewOpPart(op, "size", op.getStaticSizesAttrName(), - op.static_sizes(), ShapedType::isDynamic, - op.sizes()))) - return failure(); - if (failed(verifySubViewOpPart( - op, "stride", op.getStaticStridesAttrName(), op.static_strides(), - ShapedType::isDynamicStrideOrOffset, op.strides()))) + if (failed(verifyOpWithOffsetSizesAndStrides(op))) return failure(); // Verify result type against inferred type. - auto expectedType = SubViewOp::inferSubViewResultType( - op.getBaseMemRefType(), extractFromI64ArrayAttr(op.static_offsets()), + auto expectedType = SubViewOp::inferResultType( + baseType, extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); if (!isRankReducedType(expectedType, subViewType)) @@ -2885,123 +2922,41 @@ static LogicalResult verify(SubViewOp op) { return success(); } -raw_ostream &mlir::operator<<(raw_ostream &os, SubViewOp::Range &range) { +raw_ostream &mlir::operator<<(raw_ostream &os, Range &range) { return os << "range " << range.offset << ":" << range.size << ":" << range.stride; } -static unsigned getNumDynamicEntriesUpToIdx( - ArrayAttr attr, llvm::function_ref isDynamic, unsigned idx) { - return std::count_if(attr.getValue().begin(), attr.getValue().begin() + idx, - [&](Attribute attr) { - return isDynamic(attr.cast().getInt()); - }); -} - -bool SubViewOp::isDynamicOffset(unsigned idx) { - return ShapedType::isDynamicStrideOrOffset( - extractFromI64ArrayAttr(static_offsets())[idx]); -} -bool SubViewOp::isDynamicSize(unsigned idx) { - return ShapedType::isDynamic(extractFromI64ArrayAttr(static_sizes())[idx]); -} -bool SubViewOp::isDynamicStride(unsigned idx) { - return ShapedType::isDynamicStrideOrOffset( - extractFromI64ArrayAttr(static_strides())[idx]); -} - -unsigned SubViewOp::getIndexOfDynamicOffset(unsigned idx) { - assert(isDynamicOffset(idx) && "expected static offset"); - auto numDynamic = - getNumDynamicEntriesUpToIdx(static_offsets().cast(), - ShapedType::isDynamicStrideOrOffset, idx); - return 1 + numDynamic; -} -unsigned SubViewOp::getIndexOfDynamicSize(unsigned idx) { - assert(isDynamicSize(idx) && "expected static size"); - auto numDynamic = getNumDynamicEntriesUpToIdx( - static_sizes().cast(), ShapedType::isDynamic, idx); - return 1 + offsets().size() + numDynamic; -} -unsigned SubViewOp::getIndexOfDynamicStride(unsigned idx) { - assert(isDynamicStride(idx) && "expected static stride"); - auto numDynamic = - getNumDynamicEntriesUpToIdx(static_strides().cast(), - ShapedType::isDynamicStrideOrOffset, idx); - return 1 + offsets().size() + sizes().size() + numDynamic; -} - -/// Return the list of SubViewOp::Range (i.e. offset, size, stride). Each Range +/// Return the list of Range (i.e. offset, size, stride). Each Range /// entry contains either the dynamic value or a ConstantIndexOp constructed /// with `b` at location `loc`. -SmallVector SubViewOp::getOrCreateRanges(OpBuilder &b, - Location loc) { +template +static SmallVector getOrCreateRangesImpl(OpType op, OpBuilder &b, + Location loc) { SmallVector res; - unsigned rank = getType().getRank(); + unsigned rank = op.getSourceRank(); res.reserve(rank); for (unsigned idx = 0; idx < rank; ++idx) { - auto offset = isDynamicOffset(idx) - ? getDynamicOffset(idx) - : b.create(loc, getStaticOffset(idx)); - auto size = isDynamicSize(idx) - ? getDynamicSize(idx) - : b.create(loc, getStaticSize(idx)); - auto stride = isDynamicStride(idx) - ? getDynamicStride(idx) - : b.create(loc, getStaticStride(idx)); + Value offset = + op.isDynamicOffset(idx) + ? op.getDynamicOffset(idx) + : b.create(loc, op.getStaticOffset(idx)); + Value size = op.isDynamicSize(idx) + ? op.getDynamicSize(idx) + : b.create(loc, op.getStaticSize(idx)); + Value stride = + op.isDynamicStride(idx) + ? op.getDynamicStride(idx) + : b.create(loc, op.getStaticStride(idx)); res.emplace_back(Range{offset, size, stride}); } return res; } -SmallVector SubViewOp::getOrCreateOffsets(OpBuilder &b, - Location loc) { - unsigned dynamicIdx = 1; - return llvm::to_vector<4>(llvm::map_range( - static_offsets().cast(), [&](Attribute a) -> Value { - int64_t staticOffset = a.cast().getInt(); - if (ShapedType::isDynamicStrideOrOffset(staticOffset)) - return getOperand(dynamicIdx++); - else - return b.create(loc, staticOffset); - })); -} - -SmallVector SubViewOp::getOrCreateSizes(OpBuilder &b, Location loc) { - unsigned dynamicIdx = 1 + offsets().size(); - return llvm::to_vector<4>(llvm::map_range( - static_sizes().cast(), [&](Attribute a) -> Value { - int64_t staticSize = a.cast().getInt(); - if (ShapedType::isDynamic(staticSize)) - return getOperand(dynamicIdx++); - else - return b.create(loc, staticSize); - })); -} - -SmallVector SubViewOp::getOrCreateStrides(OpBuilder &b, - Location loc) { - unsigned dynamicIdx = 1 + offsets().size() + sizes().size(); - return llvm::to_vector<4>(llvm::map_range( - static_strides().cast(), [&](Attribute a) -> Value { - int64_t staticStride = a.cast().getInt(); - if (ShapedType::isDynamicStrideOrOffset(staticStride)) - return getOperand(dynamicIdx++); - else - return b.create(loc, staticStride); - })); +SmallVector SubViewOp::getOrCreateRanges(OpBuilder &b, Location loc) { + return ::getOrCreateRangesImpl(*this, b, loc); } -LogicalResult -SubViewOp::getStaticStrides(SmallVectorImpl &staticStrides) { - if (!strides().empty()) - return failure(); - staticStrides = extractFromI64ArrayAttr(static_strides()); - return success(); -} - -Value SubViewOp::getViewSource() { return source(); } - namespace { /// Take a list of `values` with potential new constant to extract and a list @@ -3053,20 +3008,20 @@ class SubViewOpConstantArgumentFolder final SmallVector newOffsets(subViewOp.offsets()); SmallVector newStaticOffsets = extractFromI64ArrayAttr(subViewOp.static_offsets()); - assert(newStaticOffsets.size() == subViewOp.getRank()); + assert(newStaticOffsets.size() == subViewOp.getSourceRank()); canonicalizeSubViewPart(newOffsets, newStaticOffsets, ShapedType::isDynamicStrideOrOffset); SmallVector newSizes(subViewOp.sizes()); SmallVector newStaticSizes = extractFromI64ArrayAttr(subViewOp.static_sizes()); - assert(newStaticOffsets.size() == subViewOp.getRank()); + assert(newStaticOffsets.size() == subViewOp.getSourceRank()); canonicalizeSubViewPart(newSizes, newStaticSizes, ShapedType::isDynamic); SmallVector newStrides(subViewOp.strides()); SmallVector newStaticStrides = extractFromI64ArrayAttr(subViewOp.static_strides()); - assert(newStaticOffsets.size() == subViewOp.getRank()); + assert(newStaticOffsets.size() == subViewOp.getSourceRank()); canonicalizeSubViewPart(newStrides, newStaticStrides, ShapedType::isDynamicStrideOrOffset); @@ -3210,7 +3165,7 @@ class SubViewOpMemRefCastFolder final : public OpRewritePattern { /// Deduce the resultType of the SubViewOp using `inferSubViewResultType` on /// the cast source operand type and the SubViewOp static information. This /// is the resulting type if the MemRefCastOp were folded. - Type resultType = SubViewOp::inferSubViewResultType( + Type resultType = SubViewOp::inferResultType( castOp.source().getType().cast(), extractFromI64ArrayAttr(subViewOp.static_offsets()), extractFromI64ArrayAttr(subViewOp.static_sizes()), @@ -3232,6 +3187,94 @@ void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results, context); } +//===----------------------------------------------------------------------===// +// SubTensorOp +//===----------------------------------------------------------------------===// + +static void print(OpAsmPrinter &p, SubTensorOp op) { + return printOpWithOffsetsSizesAndStrides(p, op); +} + +static ParseResult parseSubTensorOp(OpAsmParser &parser, + OperationState &result) { + return parseOpWithOffsetsSizesAndStrides(parser, result); +} + +/// A subtensor result type can be fully inferred from the source type and the +/// static representation of offsets, sizes and strides. Special sentinels +/// encode the dynamic case. +Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides) { + unsigned rank = sourceRankedTensorType.getRank(); + (void)rank; + assert(staticOffsets.size() == rank && + "unexpected staticOffsets size mismatch"); + assert(staticSizes.size() == rank && "unexpected staticSizes size mismatch"); + assert(staticStrides.size() == rank && + "unexpected staticStrides size mismatch"); + return RankedTensorType::get(staticSizes, + sourceRankedTensorType.getElementType()); +} + +void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, + Value source, ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides, + ValueRange offsets, ValueRange sizes, + ValueRange strides, + ArrayRef attrs) { + auto sourceRankedTensorType = source.getType().cast(); + auto resultType = inferResultType(sourceRankedTensorType, staticOffsets, + staticSizes, staticStrides); + build(b, result, resultType, source, offsets, sizes, strides, + b.getI64ArrayAttr(staticOffsets), b.getI64ArrayAttr(staticSizes), + b.getI64ArrayAttr(staticStrides)); + result.addAttributes(attrs); +} + +/// Build a SubTensorOp with all dynamic entries: `staticOffsets`, `staticSizes` +/// and `staticStrides` are automatically filled with sentinel values that +/// encode dynamic entries. +void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, + Value source, ValueRange offsets, + ValueRange sizes, ValueRange strides, + ArrayRef attrs) { + auto sourceRankedTensorType = source.getType().cast(); + unsigned rank = sourceRankedTensorType.getRank(); + SmallVector staticOffsetsVector( + rank, ShapedType::kDynamicStrideOrOffset); + SmallVector staticSizesVector(rank, ShapedType::kDynamicSize); + SmallVector staticStridesVector( + rank, ShapedType::kDynamicStrideOrOffset); + build(b, result, source, staticOffsetsVector, staticSizesVector, + staticStridesVector, offsets, sizes, strides, attrs); +} + +SmallVector SubTensorOp::getOrCreateRanges(OpBuilder &b, + Location loc) { + return ::getOrCreateRangesImpl(*this, b, loc); +} + +/// Verifier for SubTensorOp. +static LogicalResult verify(SubTensorOp op) { + if (failed(verifyOpWithOffsetSizesAndStrides(op))) + return failure(); + + // Verify result type against inferred type. + auto expectedType = SubTensorOp::inferResultType( + op.getSourceRankedTensorType(), + extractFromI64ArrayAttr(op.static_offsets()), + extractFromI64ArrayAttr(op.static_sizes()), + extractFromI64ArrayAttr(op.static_strides())); + if (!isRankReducedType(expectedType, op.getType())) + return op.emitError("expected result type to be ") + << expectedType << " or a rank-reduced version."; + + return success(); +} + //===----------------------------------------------------------------------===// // TensorCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 5e3959af29ddc..72a063ff9d51b 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -900,3 +900,27 @@ func @assume_alignment(%0: memref<4x4xf16>) { assume_alignment %0, 16 : memref<4x4xf16> return } + + +// CHECK-LABEL: func @subtensor({{.*}}) { +func @subtensor(%t: tensor<8x16x4xf32>, %idx : index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + + // CHECK: subtensor + // CHECK-SAME: tensor<8x16x4xf32> to tensor + %1 = subtensor %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1] + : tensor<8x16x4xf32> to tensor + + // CHECK: subtensor + // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4x4xf32> + %2 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1] + : tensor<8x16x4xf32> to tensor<4x4x4xf32> + + // CHECK: subtensor + // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4xf32> + %3 = subtensor %t[0, 2, 0][4, 1, 4][1, 1, 1] + : tensor<8x16x4xf32> to tensor<4x4xf32> + + return +} diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index ab18845bdb532..7356c07577dba 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1255,3 +1255,23 @@ func @imaginary_part_from_incompatible_complex_type(%cplx: complex) { std.re %cplx : complex return } + +// ----- + +func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { + // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>'}} + %0 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1] + : tensor<8x16x4xf32> to tensor + + return +} + +// ----- + +func @subtensor_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) { + // expected-error @+1 {{expected result type to be 'tensor'}} + %0 = subtensor %t[0, 0, 0][%idx, 3, %idx][1, 1, 1] + : tensor<8x16x4xf32> to tensor<4x4x4xf32> + + return +} diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index edcc66c9b6a61..ffb0f92dae99c 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -301,8 +301,7 @@ static void fillPromotionCallBackPatterns(MLIRContext *ctx, template static SmallVector -getGpuProcIds(OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { +getGpuProcIds(OpBuilder &b, Location loc, ArrayRef parallelLoopRanges) { Type indexType = b.getIndexType(); SmallVector procInfo(2); procInfo[0] = {b.create(loc, indexType, b.getStringAttr("y")), From 787bf5e383a32b3ebc87332ff9e868db8f937056 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 2 Oct 2020 05:40:52 -0400 Subject: [PATCH 359/544] [mlir] Add canonicalization for the `subtensor` op Differential revision: https://reviews.llvm.org/D88656 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 2 +- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 60 ++++++++++++------- mlir/test/Transforms/canonicalize.mlir | 29 +++++++++ 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index dbc3e4ca521bf..3d9daee964b60 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -3164,7 +3164,7 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor"> { ArrayRef staticStrides); }]; - // let hasCanonicalizer = 1; + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index d684a4b98e556..5548274eee188 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2989,50 +2989,59 @@ void canonicalizeSubViewPart(SmallVectorImpl &values, } } +static void replaceWithNewOp(PatternRewriter &rewriter, SubViewOp op, + SubViewOp newOp) { + rewriter.replaceOpWithNewOp(op, newOp, op.getType()); +} + +static void replaceWithNewOp(PatternRewriter &rewriter, SubTensorOp op, + SubTensorOp newOp) { + rewriter.replaceOpWithNewOp(op, newOp, op.getType()); +} + /// Pattern to rewrite a subview op with constant arguments. -class SubViewOpConstantArgumentFolder final - : public OpRewritePattern { +template +class OpWithOffsetSizesAndStridesConstantArgumentFolder final + : public OpRewritePattern { public: - using OpRewritePattern::OpRewritePattern; + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(SubViewOp subViewOp, + LogicalResult matchAndRewrite(OpType op, PatternRewriter &rewriter) const override { // No constant operand, just return; - if (llvm::none_of(subViewOp.getOperands(), [](Value operand) { + if (llvm::none_of(op.getOperands(), [](Value operand) { return matchPattern(operand, m_ConstantIndex()); })) return failure(); // At least one of offsets/sizes/strides is a new constant. // Form the new list of operands and constant attributes from the existing. - SmallVector newOffsets(subViewOp.offsets()); + SmallVector newOffsets(op.offsets()); SmallVector newStaticOffsets = - extractFromI64ArrayAttr(subViewOp.static_offsets()); - assert(newStaticOffsets.size() == subViewOp.getSourceRank()); + extractFromI64ArrayAttr(op.static_offsets()); + assert(newStaticOffsets.size() == op.getSourceRank()); canonicalizeSubViewPart(newOffsets, newStaticOffsets, ShapedType::isDynamicStrideOrOffset); - SmallVector newSizes(subViewOp.sizes()); + SmallVector newSizes(op.sizes()); SmallVector newStaticSizes = - extractFromI64ArrayAttr(subViewOp.static_sizes()); - assert(newStaticOffsets.size() == subViewOp.getSourceRank()); + extractFromI64ArrayAttr(op.static_sizes()); + assert(newStaticOffsets.size() == op.getSourceRank()); canonicalizeSubViewPart(newSizes, newStaticSizes, ShapedType::isDynamic); - SmallVector newStrides(subViewOp.strides()); + SmallVector newStrides(op.strides()); SmallVector newStaticStrides = - extractFromI64ArrayAttr(subViewOp.static_strides()); - assert(newStaticOffsets.size() == subViewOp.getSourceRank()); + extractFromI64ArrayAttr(op.static_strides()); + assert(newStaticOffsets.size() == op.getSourceRank()); canonicalizeSubViewPart(newStrides, newStaticStrides, ShapedType::isDynamicStrideOrOffset); // Create the new op in canonical form. - auto newSubViewOp = rewriter.create( - subViewOp.getLoc(), subViewOp.source(), newStaticOffsets, - newStaticSizes, newStaticStrides, newOffsets, newSizes, newStrides); + auto newOp = rewriter.create( + op.getLoc(), op.source(), newStaticOffsets, newStaticSizes, + newStaticStrides, newOffsets, newSizes, newStrides); - // Insert a memref_cast for compatibility of the uses of the op. - rewriter.replaceOpWithNewOp(subViewOp, newSubViewOp, - subViewOp.getType()); + replaceWithNewOp(rewriter, op, newOp); return success(); } @@ -3183,8 +3192,8 @@ class SubViewOpMemRefCastFolder final : public OpRewritePattern { void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert( - context); + results.insert, + SubViewOpMemRefCastFolder>(context); } //===----------------------------------------------------------------------===// @@ -3275,6 +3284,13 @@ static LogicalResult verify(SubTensorOp op) { return success(); } +void SubTensorOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results + .insert>( + context); +} + //===----------------------------------------------------------------------===// // TensorCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 3603c473a1fd7..dc7be097b0c06 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1110,3 +1110,32 @@ func @tensor_cast_chain_invalid(%input: tensor<4x8xi32>) -> tensor<8x4xi32> { // CHECK-NEXT: return %[[C2]] return %1 : tensor<8x4xi32> } + +// ----- + +// CHECK-LABEL: func @subtensor +// CHECK-SAME: %[[ARG0:[0-9a-z]*]]: index, %[[ARG1:[0-9a-z]*]]: index +func @subtensor(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index) + -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c7 = constant 7 : index + %c11 = constant 11 : index + + // CHECK: subtensor %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] : + // CHECK-SAME: tensor<8x16x4xf32> to tensor<7x11x2xf32> + // CHECK: tensor_cast %{{.*}} : tensor<7x11x2xf32> to tensor + %1 = subtensor %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1] + : tensor<8x16x4xf32> to tensor + + // Test: subtensor with one dynamic operand can also be folded. + // CHECK: subtensor %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] : + // CHECK-SAME: tensor to tensor<2x?x2xf32> + // CHECK: tensor_cast %{{.*}} : tensor<2x?x2xf32> to tensor + %2 = subtensor %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1] + : tensor to tensor + + return %2 : tensor +} From ec07ae2a833ef5b2282811f51fdfbd043c611936 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 11:06:39 +0100 Subject: [PATCH 360/544] [InstCombine] Add some basic vector bswap tests We get the vNi16 cases already via matching as a rotate followed by the fshl -> bswap combines --- llvm/test/Transforms/InstCombine/bswap.ll | 152 ++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 4c6e3dd9a2ff5..18a831a330cbe 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -20,6 +20,31 @@ define i32 @test1(i32 %i) { ret i32 %t12 } +define <2 x i32> @test1_vector(<2 x i32> %i) { +; CHECK-LABEL: @test1_vector( +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[I:%.*]], +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[I]], +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], +; CHECK-NEXT: [[T5:%.*]] = or <2 x i32> [[T1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = shl <2 x i32> [[I]], +; CHECK-NEXT: [[T8:%.*]] = and <2 x i32> [[T7]], +; CHECK-NEXT: [[T9:%.*]] = or <2 x i32> [[T5]], [[T8]] +; CHECK-NEXT: [[T11:%.*]] = shl <2 x i32> [[I]], +; CHECK-NEXT: [[T12:%.*]] = or <2 x i32> [[T9]], [[T11]] +; CHECK-NEXT: ret <2 x i32> [[T12]] +; + %t1 = lshr <2 x i32> %i, + %t3 = lshr <2 x i32> %i, + %t4 = and <2 x i32> %t3, + %t5 = or <2 x i32> %t1, %t4 + %t7 = shl <2 x i32> %i, + %t8 = and <2 x i32> %t7, + %t9 = or <2 x i32> %t5, %t8 + %t11 = shl <2 x i32> %i, + %t12 = or <2 x i32> %t9, %t11 + ret <2 x i32> %t12 +} + define i32 @test2(i32 %arg) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[T14:%.*]] = call i32 @llvm.bswap.i32(i32 [[ARG:%.*]]) @@ -37,6 +62,56 @@ define i32 @test2(i32 %arg) { ret i32 %t14 } +define <2 x i32> @test2_vector(<2 x i32> %arg) { +; CHECK-LABEL: @test2_vector( +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], +; CHECK-NEXT: [[T6:%.*]] = or <2 x i32> [[T2]], [[T5]] +; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], +; CHECK-NEXT: [[T10:%.*]] = or <2 x i32> [[T6]], [[T9]] +; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T14:%.*]] = or <2 x i32> [[T10]], [[T12]] +; CHECK-NEXT: ret <2 x i32> [[T14]] +; + %t2 = shl <2 x i32> %arg, + %t4 = shl <2 x i32> %arg, + %t5 = and <2 x i32> %t4, + %t6 = or <2 x i32> %t2, %t5 + %t8 = lshr <2 x i32> %arg, + %t9 = and <2 x i32> %t8, + %t10 = or <2 x i32> %t6, %t9 + %t12 = lshr <2 x i32> %arg, + %t14 = or <2 x i32> %t10, %t12 + ret <2 x i32> %t14 +} + +define <2 x i32> @test2_vector_undef(<2 x i32> %arg) { +; CHECK-LABEL: @test2_vector_undef( +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], +; CHECK-NEXT: [[T6:%.*]] = or <2 x i32> [[T2]], [[T5]] +; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], +; CHECK-NEXT: [[T10:%.*]] = or <2 x i32> [[T6]], [[T9]] +; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T14:%.*]] = or <2 x i32> [[T10]], [[T12]] +; CHECK-NEXT: ret <2 x i32> [[T14]] +; + %t2 = shl <2 x i32> %arg, + %t4 = shl <2 x i32> %arg, + %t5 = and <2 x i32> %t4, + %t6 = or <2 x i32> %t2, %t5 + %t8 = lshr <2 x i32> %arg, + %t9 = and <2 x i32> %t8, + %t10 = or <2 x i32> %t6, %t9 + %t12 = lshr <2 x i32> %arg, + %t14 = or <2 x i32> %t10, %t12 + ret <2 x i32> %t14 +} + define i16 @test3(i16 %s) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[T5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) @@ -48,6 +123,28 @@ define i16 @test3(i16 %s) { ret i16 %t5 } +define <2 x i16> @test3_vector(<2 x i16> %s) { +; CHECK-LABEL: @test3_vector( +; CHECK-NEXT: [[T5:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[S:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t2 = lshr <2 x i16> %s, + %t4 = shl <2 x i16> %s, + %t5 = or <2 x i16> %t2, %t4 + ret <2 x i16> %t5 +} + +define <2 x i16> @test3_vector_undef(<2 x i16> %s) { +; CHECK-LABEL: @test3_vector_undef( +; CHECK-NEXT: [[T5:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[S:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t2 = lshr <2 x i16> %s, + %t4 = shl <2 x i16> %s, + %t5 = or <2 x i16> %t2, %t4 + ret <2 x i16> %t5 +} + define i16 @test4(i16 %s) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[T5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) @@ -59,6 +156,17 @@ define i16 @test4(i16 %s) { ret i16 %t5 } +define <2 x i16> @test4_vector(<2 x i16> %s) { +; CHECK-LABEL: @test4_vector( +; CHECK-NEXT: [[T5:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[S:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t2 = lshr <2 x i16> %s, + %t4 = shl <2 x i16> %s, + %t5 = or <2 x i16> %t4, %t2 + ret <2 x i16> %t5 +} + define i16 @test5(i16 %a) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[T_UPGRD_3:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) @@ -78,6 +186,25 @@ define i16 @test5(i16 %a) { ret i16 %retval } +define <2 x i16> @test5_vector(<2 x i16> %a) { +; CHECK-LABEL: @test5_vector( +; CHECK-NEXT: [[T_UPGRD_3:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[A:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T_UPGRD_3]] +; + %t = zext <2 x i16> %a to <2 x i32> + %t1 = and <2 x i32> %t, + %t2 = ashr <2 x i32> %t1, + %t2.upgrd.1 = trunc <2 x i32> %t2 to <2 x i16> + %t4 = and <2 x i32> %t, + %t5 = shl <2 x i32> %t4, + %t5.upgrd.2 = trunc <2 x i32> %t5 to <2 x i16> + %t.upgrd.3 = or <2 x i16> %t2.upgrd.1, %t5.upgrd.2 + %t6 = bitcast <2 x i16> %t.upgrd.3 to <2 x i16> + %t6.upgrd.4 = zext <2 x i16> %t6 to <2 x i32> + %retval = trunc <2 x i32> %t6.upgrd.4 to <2 x i16> + ret <2 x i16> %retval +} + ; PR2842 define i32 @test6(i32 %x) nounwind readnone { ; CHECK-LABEL: @test6( @@ -96,6 +223,31 @@ define i32 @test6(i32 %x) nounwind readnone { ret i32 %t7 } +define <2 x i32> @test6_vector(<2 x i32> %x) nounwind readnone { +; CHECK-LABEL: @test6_vector( +; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X]], +; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = or <2 x i32> [[X_MASK]], [[T]] +; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T3]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i32> [[T4]], +; CHECK-NEXT: [[T6:%.*]] = lshr <2 x i32> [[X]], +; CHECK-NEXT: [[T7:%.*]] = or <2 x i32> [[T5]], [[T6]] +; CHECK-NEXT: ret <2 x i32> [[T7]] +; + %t = shl <2 x i32> %x, + %x.mask = and <2 x i32> %x, + %t1 = lshr <2 x i32> %x, + %t2 = and <2 x i32> %t1, + %t3 = or <2 x i32> %x.mask, %t + %t4 = or <2 x i32> %t3, %t2 + %t5 = shl <2 x i32> %t4, + %t6 = lshr <2 x i32> %x, + %t7 = or <2 x i32> %t5, %t6 + ret <2 x i32> %t7 +} + declare void @extra_use(i32) ; swaphalf = (x << 16 | x >> 16) From 54c03d8f7da72fdf1a9e122391c51c2f0ea7b298 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Fri, 2 Oct 2020 10:12:55 +0200 Subject: [PATCH 361/544] [clangd][lit] Update document-link.test to respect custom resource-dir locations Differential Revision: https://reviews.llvm.org/D88721 --- clang-tools-extra/clangd/test/document-link.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/test/document-link.test b/clang-tools-extra/clangd/test/document-link.test index 7802fe7845687..a4c82e5b2fb6b 100644 --- a/clang-tools-extra/clangd/test/document-link.test +++ b/clang-tools-extra/clangd/test/document-link.test @@ -1,4 +1,5 @@ -# RUN: clangd -lit-test < %s | FileCheck -strict-whitespace %s +# %resource_dir actually points at builtin_include_dir, go up one directory. +# RUN: clangd -lit-test -resource-dir=%resource_dir/.. < %s | FileCheck -strict-whitespace %s {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}} --- {"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"#include \n#include "}}} From cf9503c1b752062d9abfb2c7922a50574d9c5de4 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 2 Oct 2020 06:30:56 -0400 Subject: [PATCH 362/544] [mlir] Add subtensor_insert operation Differential revision: https://reviews.llvm.org/D88657 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 134 +++++++++++++++--- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 117 ++++++++++++--- mlir/test/IR/core-ops.mlir | 19 ++- 3 files changed, 235 insertions(+), 35 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 3d9daee964b60..c62be7571aad7 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -2922,15 +2922,20 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< The SubView operation supports the following arguments: - * Memref: the "base" memref on which to create a "view" memref. - * Offsets: memref-rank number of dynamic offsets or static integer - attributes into the "base" memref at which to create the "view" - memref. - * Sizes: memref-rank number of dynamic sizes or static integer attributes - which specify the sizes of the result "view" memref type. - * Strides: memref-rank number of dynamic strides or static integer - attributes that compose multiplicatively with the base memref - strides in each dimension. + * semref: the "base" memref on which to create a "view" memref. + * offsets: memref-rank number of offsets into the "base" memref at which to + create the "view" memref. + * sizes: memref-rank number of sizes which specify the sizes of the result + "view" memref type. + * strides: memref-rank number of strides that compose multiplicatively with + the base memref strides in each dimension. + + The representation based on offsets, sizes and strides support a + partially-static specification via attributes specified through the + `static_offsets`, `static_sizes` and `static_strides` arguments. A special + sentinel value ShapedType::kDynamicSize and + ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has + a static value. A subview operation may additionally reduce the rank of the resulting view by removing dimensions that are statically known to be of size 1. @@ -3076,7 +3081,7 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides< let extraClassDeclaration = extraBaseClassDeclaration # [{ /// Returns the type of the base memref operand. - MemRefType getSourceMemRefType() { + MemRefType getSourceType() { return source().getType().cast(); } @@ -3108,13 +3113,19 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor"> { The subtensor operation supports the following arguments: * tensor: the "base" tensor from which to extract a subtensor. - * offsets: tensor-rank number of dynamic offsets or static integer - attributes into the "base" tensor from which to extract the - subtensor. - * sizes: tensor-rank number of dynamic sizes or static integer attributes - which specify the sizes of the result tensor type. - * strides: tensor-rank number of dynamic strides or static integer - attributes specifying susampling in each dimension. + * offsets: tensor-rank number of offsets into the "base" tensor from which + to extract the subtensor. + * sizes: tensor-rank number of sizes which specify the sizes of the result + tensor type. + * strides: tensor-rank number of strides specifying subsampling in each + dimension. + + The representation based on offsets, sizes and strides support a + partially-static specification via attributes specified through the + `static_offsets`, `static_sizes` and `static_strides` arguments. A special + sentinel value ShapedType::kDynamicSize and + ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has + a static value. After buffer-allocation, the "subtensor" op is expected to lower into a "subview" op. @@ -3144,9 +3155,22 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor"> { ); let results = (outs AnyRankedTensor:$result); + let builders = [ + // Build a SubViewOp with mixed static and dynamic entries. + OpBuilder< + "Value source, ArrayRef staticOffsets, " + "ArrayRef staticSizes, ArrayRef staticStrides, " + "ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}">, + // Build a SubViewOp with all dynamic entries. + OpBuilder< + "Value source, ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}"> + ]; + let extraClassDeclaration = extraBaseClassDeclaration # [{ /// Returns the type of the base tensor operand. - RankedTensorType getSourceRankedTensorType() { + RankedTensorType getSourceType() { return source().getType().cast(); } @@ -3167,6 +3191,80 @@ def SubTensorOp : BaseOpWithOffsetSizesAndStrides<"subtensor"> { let hasCanonicalizer = 1; } +//===----------------------------------------------------------------------===// +// SubTensorInsertOp +//===----------------------------------------------------------------------===// + +def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides<"subtensor_insert"> { + let summary = "subtensor_insert operation"; + let description = [{ + The "subtensor_insert" operation insert a tensor `source` into another + tensor `dest` as specified by the operation's offsets, sizes and strides + arguments. + + It returns a copy of `dest` with the proper subtensor updated with the value + of `source`. + + The subtensor_insert operation has the encodes the following information: + + * source: the tensor that is inserted. + * dest: the tensor into which the source tensor is inserted. + * offsets: tensor-rank number of offsets into the "base" tensor from which + to extract the subtensor. + * sizes: tensor-rank number of sizes which specify the sizes of the result + tensor type. + * strides: tensor-rank number of strides that specify subsampling in each + dimension. + + The representation based on offsets, sizes and strides support a + partially-static specification via attributes specified through the + `static_offsets`, `static_sizes` and `static_strides` arguments. A special + sentinel value ShapedType::kDynamicSize and + ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has + a static value. + + After buffer-allocation, the "subtensor_insert" op is expected to become + an in-place buffer update. + }]; + + let arguments = (ins + AnyRankedTensor:$source, + AnyRankedTensor:$dest, + Variadic:$offsets, + Variadic:$sizes, + Variadic:$strides, + I64ArrayAttr:$static_offsets, + I64ArrayAttr:$static_sizes, + I64ArrayAttr:$static_strides + ); + let results = (outs AnyRankedTensor:$result); + + let builders = [ + // Build a SubViewOp with mixed static and dynamic entries. + OpBuilder< + "Value source, Value dest, ArrayRef staticOffsets, " + "ArrayRef staticSizes, ArrayRef staticStrides, " + "ValueRange offsets, ValueRange sizes, ValueRange strides, " + "ArrayRef attrs = {}">, + // Build a SubViewOp with all dynamic entries. + OpBuilder< + "Value source, Value dest, ValueRange offsets, ValueRange sizes, " + "ValueRange strides, ArrayRef attrs = {}"> + ]; + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + /// Returns the type of the base tensor operand. + RankedTensorType getSourceType() { + return source().getType().cast(); + } + + /// The result of a subtensor is always a tensor. + RankedTensorType getType() { + return getResult().getType().cast(); + } + }]; +} + //===----------------------------------------------------------------------===// // TanhOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 5548274eee188..7f4e2ffa5262f 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -23,6 +23,7 @@ #include "mlir/IR/Value.h" #include "mlir/Support/MathExtras.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" @@ -2639,10 +2640,15 @@ Type SubViewOp::inferResultType(MemRefType sourceMemRefType, /// `:` strided-memref-type `to` strided-memref-type /// ``` template -static void printOpWithOffsetsSizesAndStrides(OpAsmPrinter &p, OpType op) { +static void printOpWithOffsetsSizesAndStrides( + OpAsmPrinter &p, OpType op, + llvm::function_ref printExtraOperands = + [](OpAsmPrinter &p, OpType op) {}, + StringLiteral resultTypeKeyword = "to") { int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1; p << op.getOperation()->getName().getStringRef().drop_front(stdDotLen) << ' '; - p << op.getOperand(0); + p << op.source(); + printExtraOperands(p, op); printSubViewListOfOperandsOrIntegers(p, op.offsets(), op.static_offsets(), ShapedType::isDynamicStrideOrOffset); printSubViewListOfOperandsOrIntegers(p, op.sizes(), op.static_sizes(), @@ -2651,27 +2657,35 @@ static void printOpWithOffsetsSizesAndStrides(OpAsmPrinter &p, OpType op) { ShapedType::isDynamicStrideOrOffset); p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{OpType::getSpecialAttrNames()}); - p << " : " << op.getOperand(0).getType() << " to " << op.getType(); + p << " : " << op.getSourceType() << " " << resultTypeKeyword << " " + << op.getType(); } static void print(OpAsmPrinter &p, SubViewOp op) { return printOpWithOffsetsSizesAndStrides(p, op); } -/// Parse SubViewOp of the form: +/// Parse of the form: /// ``` -/// `name` ssa-name `[` offset-list `]` `[` size-list `]` `[` stride-list `]` -/// `:` strided-memref-type `to` strided-memref-type +/// `name` ssa-name (extra-operands)? +/// `[` offset-list `]` `[` size-list `]` `[` stride-list `]` +/// `:` strided-memref-type `resultTypeKeyword strided-memref-type /// ``` template -static ParseResult parseOpWithOffsetsSizesAndStrides(OpAsmParser &parser, - OperationState &result) { - OpAsmParser::OperandType srcInfo; +static ParseResult parseOpWithOffsetsSizesAndStrides( + OpAsmParser &parser, OperationState &result, + std::function + parseExtraOperand = nullptr, + StringLiteral resultTypeKeyword = "to") { + OpAsmParser::OperandType srcInfo, dstInfo; SmallVector offsetsInfo, sizesInfo, stridesInfo; auto indexType = parser.getBuilder().getIndexType(); Type srcType, dstType; if (parser.parseOperand(srcInfo)) return failure(); + if (parseExtraOperand && parseExtraOperand(parser, dstInfo)) + return failure(); if (parseListOfOperandsOrIntegers( parser, result, OpType::getStaticOffsetsAttrName(), ShapedType::kDynamicStrideOrOffset, offsetsInfo) || @@ -2683,21 +2697,27 @@ static ParseResult parseOpWithOffsetsSizesAndStrides(OpAsmParser &parser, ShapedType::kDynamicStrideOrOffset, stridesInfo)) return failure(); + // Handle segment sizes. auto b = parser.getBuilder(); - SmallVector segmentSizes{1, static_cast(offsetsInfo.size()), - static_cast(sizesInfo.size()), - static_cast(stridesInfo.size())}; + SmallVector segmentSizes = {1, static_cast(offsetsInfo.size()), + static_cast(sizesInfo.size()), + static_cast(stridesInfo.size())}; + // If we parse an extra operand it needs to appear in the segmentSizes + if (parseExtraOperand) + segmentSizes.insert(segmentSizes.begin(), 1); result.addAttribute(OpType::getOperandSegmentSizeAttr(), b.getI32VectorAttr(segmentSizes)); return failure( parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(srcType) || + parser.parseKeywordType(resultTypeKeyword.str().c_str(), dstType) || parser.resolveOperand(srcInfo, srcType, result.operands) || + (parseExtraOperand && + parser.resolveOperand(dstInfo, dstType, result.operands)) || parser.resolveOperands(offsetsInfo, indexType, result.operands) || parser.resolveOperands(sizesInfo, indexType, result.operands) || parser.resolveOperands(stridesInfo, indexType, result.operands) || - parser.parseKeywordType("to", dstType) || parser.addTypeToList(dstType, result.types)); } @@ -2894,7 +2914,7 @@ static LogicalResult verifyOpWithOffsetSizesAndStrides(OpType op) { /// Verifier for SubViewOp. static LogicalResult verify(SubViewOp op) { - MemRefType baseType = op.getSourceMemRefType(); + MemRefType baseType = op.getSourceType(); MemRefType subViewType = op.getType(); // The base memref and the view memref should be in the same memory space. @@ -3273,8 +3293,7 @@ static LogicalResult verify(SubTensorOp op) { // Verify result type against inferred type. auto expectedType = SubTensorOp::inferResultType( - op.getSourceRankedTensorType(), - extractFromI64ArrayAttr(op.static_offsets()), + op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); if (!isRankReducedType(expectedType, op.getType())) @@ -3291,6 +3310,72 @@ void SubTensorOp::getCanonicalizationPatterns(OwningRewritePatternList &results, context); } +//===----------------------------------------------------------------------===// +// SubTensorInsertOp +//===----------------------------------------------------------------------===// + +static void print(OpAsmPrinter &p, SubTensorInsertOp op) { + return printOpWithOffsetsSizesAndStrides( + p, op, + [](OpAsmPrinter &p, SubTensorInsertOp op) { p << " into " << op.dest(); }, + /*resultTypeKeyword=*/"into"); +} + +static ParseResult parseSubTensorInsertOp(OpAsmParser &parser, + OperationState &result) { + return parseOpWithOffsetsSizesAndStrides( + parser, result, + [](OpAsmParser &parser, OpAsmParser::OperandType &dstInfo) { + return failure(parser.parseKeyword("into") || + parser.parseOperand(dstInfo)); + }, + "into"); +} + +void mlir::SubTensorInsertOp::build( + OpBuilder &b, OperationState &result, Value source, Value dest, + ArrayRef staticOffsets, ArrayRef staticSizes, + ArrayRef staticStrides, ValueRange offsets, ValueRange sizes, + ValueRange strides, ArrayRef attrs) { + build(b, result, dest.getType(), source, dest, offsets, sizes, strides, + b.getI64ArrayAttr(staticOffsets), b.getI64ArrayAttr(staticSizes), + b.getI64ArrayAttr(staticStrides)); + result.addAttributes(attrs); +} + +/// Build a SubViewOp with all dynamic entries: `staticOffsets`, `staticSizes` +/// and `staticStrides` are automatically filled with source-memref-rank +/// sentinel values that encode dynamic entries. +void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result, + Value source, Value dest, + ValueRange offsets, ValueRange sizes, + ValueRange strides, + ArrayRef attrs) { + auto sourceRankedTensorType = source.getType().cast(); + unsigned rank = sourceRankedTensorType.getRank(); + SmallVector staticOffsetsVector( + rank, ShapedType::kDynamicStrideOrOffset); + SmallVector staticSizesVector(rank, ShapedType::kDynamicSize); + SmallVector staticStridesVector( + rank, ShapedType::kDynamicStrideOrOffset); + build(b, result, source, dest, staticOffsetsVector, staticSizesVector, + staticStridesVector, offsets, sizes, strides, attrs); +} + +SmallVector SubTensorInsertOp::getOrCreateRanges(OpBuilder &b, + Location loc) { + return ::getOrCreateRangesImpl(*this, b, loc); +} + +/// Verifier for SubViewOp. +static LogicalResult verify(SubTensorInsertOp op) { + if (failed(verifyOpWithOffsetSizesAndStrides(op))) + return failure(); + if (op.getType() != op.dest().getType()) + return op.emitError("expected result type to be ") << op.dest().getType(); + return success(); +} + //===----------------------------------------------------------------------===// // TensorCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 72a063ff9d51b..2590dc0105c4e 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -901,7 +901,6 @@ func @assume_alignment(%0: memref<4x4xf16>) { return } - // CHECK-LABEL: func @subtensor({{.*}}) { func @subtensor(%t: tensor<8x16x4xf32>, %idx : index) { %c0 = constant 0 : index @@ -924,3 +923,21 @@ func @subtensor(%t: tensor<8x16x4xf32>, %idx : index) { return } + +// CHECK-LABEL: func @subtensor_insert({{.*}}) { +func @subtensor_insert(%t: tensor<8x16x4xf32>, %t2: tensor<16x32x8xf32>, %idx : index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + + // CHECK: subtensor_insert + // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32> + %1 = subtensor_insert %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1] + : tensor<8x16x4xf32> into tensor<16x32x8xf32> + + // CHECK: subtensor_insert + // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32> + %2 = subtensor_insert %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1] + : tensor<8x16x4xf32> into tensor<16x32x8xf32> + + return +} From d4568ed74328a28f79bee0738edf3d065232ced5 Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Fri, 2 Oct 2020 13:17:26 +0300 Subject: [PATCH 363/544] [MLIR][LLVM] Fixed `topologicalSort()` to iterative version Instead of recursive helper method `topologicalSortImpl()`, sort's implementation is moved to `topologicalSort()` function's body directly. `llvm::ReversePostOrderTraversal` is used to create a traversal of blocks in reverse post order. Reviewed By: kiranchandramohan, rriddle Differential Revision: https://reviews.llvm.org/D88544 --- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 22 +++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 5e393843fcf5e..23f5698b80a2c 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -18,11 +18,13 @@ #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Module.h" +#include "mlir/IR/RegionGraphTraits.h" #include "mlir/IR/StandardTypes.h" #include "mlir/Support/LLVM.h" #include "mlir/Target/LLVMIR/TypeTranslation.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/BasicBlock.h" @@ -360,25 +362,17 @@ connectPHINodes(T &func, const DenseMap &valueMapping, } } -// TODO: implement an iterative version -static void topologicalSortImpl(llvm::SetVector &blocks, Block *b) { - blocks.insert(b); - for (Block *bb : b->getSuccessors()) { - if (blocks.count(bb) == 0) - topologicalSortImpl(blocks, bb); - } -} - /// Sort function blocks topologically. template static llvm::SetVector topologicalSort(T &f) { - // For each blocks that has not been visited yet (i.e. that has no - // predecessors), add it to the list and traverse its successors in DFS - // preorder. + // For each block that has not been visited yet (i.e. that has no + // predecessors), add it to the list as well as its successors. llvm::SetVector blocks; for (Block &b : f) { - if (blocks.count(&b) == 0) - topologicalSortImpl(blocks, &b); + if (blocks.count(&b) == 0) { + llvm::ReversePostOrderTraversal traversal(&b); + blocks.insert(traversal.begin(), traversal.end()); + } } assert(blocks.size() == f.getBlocks().size() && "some blocks are not sorted"); From 8ae1369f794c1e6da6aaf1b540e3c98d1e8a16c4 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Fri, 2 Oct 2020 17:46:29 +0700 Subject: [PATCH 364/544] [GVN LoadPRE] Add test to show an opportunty. We can use context to prove that load can be safely executed at a point where load is being hoisted. --- llvm/test/Transforms/GVN/loadpre-context.ll | 144 ++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 llvm/test/Transforms/GVN/loadpre-context.ll diff --git a/llvm/test/Transforms/GVN/loadpre-context.ll b/llvm/test/Transforms/GVN/loadpre-context.ll new file mode 100644 index 0000000000000..8c9c212128429 --- /dev/null +++ b/llvm/test/Transforms/GVN/loadpre-context.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -gvn --basic-aa -S | FileCheck %s + +; load may be speculated, adress is not null using context search. +; There is a critical edge. +define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { +; CHECK-LABEL: @loadpre_critical_edge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: null_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %cmp = icmp eq i32* %arg, null + br i1 %cmp, label %null_exit, label %header + +header: + %iv = phi i32 [0, %entry], [%iv.next, %header] + %new_v = call i32 @foo(i32 %iv) + %v = load i32, i32* %arg + %sum = add i32 %new_v, %v + store i32 %sum, i32* %arg + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv.next, %N + br i1 %cond, label %exit, label %header + +exit: + ret i32 %sum + +null_exit: + ret i32 0 +} + +; load may be speculated, adress is not null using context search. +define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) { +; CHECK-LABEL: @loadpre_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: null_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %cmp = icmp eq i32* %arg, null + br i1 %cmp, label %null_exit, label %preheader + +preheader: + br label %header + +header: + %iv = phi i32 [0, %preheader], [%iv.next, %header] + %new_v = call i32 @foo(i32 %iv) + %v = load i32, i32* %arg + %sum = add i32 %new_v, %v + store i32 %sum, i32* %arg + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv.next, %N + br i1 %cond, label %exit, label %header + +exit: + ret i32 %sum + +null_exit: + ret i32 0 +} + +; load cannot be speculated, adress is not null check does not dominate the loop. +define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) { +; CHECK-LABEL: @loadpre_maybe_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[NULL_CHECK:%.*]], label [[PREHEADER:%.*]] +; CHECK: null_check: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[NULL_EXIT:%.*]], label [[PREHEADER]] +; CHECK: preheader: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[HEADER]] ] +; CHECK-NEXT: [[NEW_V:%.*]] = call i32 @foo(i32 [[IV]]) +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[NEW_V]], [[V]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: null_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %c, label %null_check, label %preheader + +null_check: + %cmp = icmp eq i32* %arg, null + br i1 %cmp, label %null_exit, label %preheader + +preheader: + br label %header + +header: + %iv = phi i32 [0, %preheader], [%iv.next, %header] + %new_v = call i32 @foo(i32 %iv) + %v = load i32, i32* %arg + %sum = add i32 %new_v, %v + store i32 %sum, i32* %arg + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv.next, %N + br i1 %cond, label %exit, label %header + +exit: + ret i32 %sum + +null_exit: + ret i32 0 +} + +; Does not guarantee that returns. +declare i32 @foo(i32) readnone From 6481a764950055a08a5b8e0ba728e7f7299f932c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Sep 2020 09:46:57 +0100 Subject: [PATCH 365/544] [PhaseOrdering] Add test that requires peeling before vectorization. Test case for PR47671. --- .../peel-before-lv-to-enable-vectorization.ll | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll new file mode 100644 index 0000000000000..a87a023bebed6 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll @@ -0,0 +1,42 @@ +; RUN: opt -O2 -S %s | FileCheck %s +; RUN: opt -passes='default' -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +; The loop below needs to be peeled first to eliminate the constant PHI %first +; before loop vectorization. +; +; Test case from PR47671. + +define i32 @test(i32* readonly %p, i32* readnone %q) { +; CHECK-LABEL: define i32 @test( +; CHECK-NOT: vector.body +; +entry: + %cmp.not7 = icmp eq i32* %p, %q + br i1 %cmp.not7, label %exit, label %loop.ph + +loop.ph: + br label %loop + +loop: + %sum = phi i32 [ %sum.next, %loop ], [ 0, %loop.ph ] + %first = phi i1 [ false, %loop ], [ true, %loop.ph ] + %iv = phi i32* [ %iv.next, %loop ], [ %p, %loop.ph ] + %add = add nsw i32 %sum, 2 + %spec.select = select i1 %first, i32 %sum, i32 %add + %lv = load i32, i32* %iv, align 4 + %sum.next = add nsw i32 %lv, %spec.select + %iv.next = getelementptr inbounds i32, i32* %iv, i64 1 + %cmp.not = icmp eq i32* %iv.next, %q + br i1 %cmp.not, label %loopexit, label %loop + +loopexit: + %sum.next.lcssa = phi i32 [ %sum.next, %loop ] + br label %exit + +exit: + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.next.lcssa, %loopexit ] + ret i32 %sum.0.lcssa +} From 57ac47d78885c9a3d712692b1476d99840591db1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Oct 2020 12:18:31 +0200 Subject: [PATCH 366/544] [clangd] Make PopulateSwitch a fix. It fixes the -Wswitch warning, though we mark it as a fix even if that is off. This makes it the "recommended" action on an incomplete switch, which seems OK. Differential Revision: https://reviews.llvm.org/D88726 --- clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp index 12a6e49a16843..a8ad905966816 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp @@ -53,7 +53,7 @@ class PopulateSwitch : public Tweak { Expected apply(const Selection &Sel) override; std::string title() const override { return "Populate switch"; } llvm::StringLiteral kind() const override { - return CodeAction::REFACTOR_KIND; + return CodeAction::QUICKFIX_KIND; } private: From 8825fec37e73eea1bc3e4f5c125e1fd02d002d6c Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 1 Oct 2020 15:47:31 +0100 Subject: [PATCH 367/544] [AArch64] Add CPU Cortex-R82 This adds support for -mcpu=cortex-r82. Some more information about this core can be found here: https://www.arm.com/products/silicon-ip-cpu/cortex-r/cortex-r82 One note about the system register: that is a bit of a refactoring because of small differences between v8.4-A AArch64 and v8-R AArch64. This is based on patches from Mark Murray and Mikhail Maltsev. Differential Revision: https://reviews.llvm.org/D88660 --- clang/lib/Basic/Targets/AArch64.cpp | 2 ++ clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 3 +- clang/test/Driver/aarch64-cpus.c | 3 ++ clang/test/Driver/aarch64-dotprod.c | 1 + .../Preprocessor/aarch64-target-features.c | 2 ++ .../llvm/Support/AArch64TargetParser.def | 10 ++++++ llvm/lib/Support/AArch64TargetParser.cpp | 2 ++ llvm/lib/Target/AArch64/AArch64.td | 33 +++++++++++++++++-- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + llvm/lib/Target/AArch64/AArch64Subtarget.h | 6 ++++ .../Target/AArch64/AArch64SystemOperands.td | 9 ++++- .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 + llvm/unittests/Support/TargetParserTest.cpp | 10 +++++- 13 files changed, 77 insertions(+), 6 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 7f0a0f0d86dc1..e25a783cfa667 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -481,6 +481,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, ArchKind = llvm::AArch64::ArchKind::ARMV8_5A; if (Feature == "+v8.6a") ArchKind = llvm::AArch64::ArchKind::ARMV8_6A; + if (Feature == "+v8r") + ArchKind = llvm::AArch64::ArchKind::ARMV8R; if (Feature == "+fullfp16") HasFullFP16 = true; if (Feature == "+dotprod") diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 6c5e43704cc49..fe742b4bcfcda 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -306,7 +306,8 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, NoCrypto = true; } - if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd) { + if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd || + std::find(ItBegin, ItEnd, "+v8r") != ItEnd) { if (HasCrypto && !NoCrypto) { // Check if we have NOT disabled an algorithm with something like: // +crypto, -algorithm diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index f39241bee8a6d..356674e7a7074 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -178,6 +178,9 @@ // RUN: %clang -target aarch64 -mcpu=cortex-a78 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXA78 %s // CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78" +// RUN: %clang -target aarch64 -mcpu=cortex-r82 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXR82 %s +// CORTEXR82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-r82" + // RUN: %clang -target aarch64_be -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s // RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s diff --git a/clang/test/Driver/aarch64-dotprod.c b/clang/test/Driver/aarch64-dotprod.c index a6d0c9c4e1cee..3ca79d54daa7c 100644 --- a/clang/test/Driver/aarch64-dotprod.c +++ b/clang/test/Driver/aarch64-dotprod.c @@ -9,4 +9,5 @@ // RUN: %clang -### -target aarch64 -mcpu=cortex-a75 %s 2>&1 | FileCheck %s // RUN: %clang -### -target aarch64 -mcpu=cortex-a76 %s 2>&1 | FileCheck %s // RUN: %clang -### -target aarch64 -mcpu=cortex-a55 %s 2>&1 | FileCheck %s +// RUN: %clang -### -target aarch64 -mcpu=cortex-r82 %s 2>&1 | FileCheck %s // CHECK: "+dotprod" diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index cb137eea072e6..ad84ba93ccf3c 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -219,6 +219,7 @@ // RUN: %clang -target aarch64 -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A57 %s // RUN: %clang -target aarch64 -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A72 %s // RUN: %clang -target aarch64 -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CORTEX-A73 %s +// RUN: %clang -target aarch64 -mcpu=cortex-r82 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CORTEX-R82 %s // RUN: %clang -target aarch64 -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M1 %s // RUN: %clang -target aarch64 -mcpu=exynos-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M4 %s // RUN: %clang -target aarch64 -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M4 %s @@ -237,6 +238,7 @@ // CHECK-MCPU-A57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" // CHECK-MCPU-A72: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" // CHECK-MCPU-CORTEX-A73: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" +// CHECK-MCPU-CORTEX-R82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8r" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+dotprod" "-target-feature" "+fp16fml" "-target-feature" "+ras" "-target-feature" "+rdm" "-target-feature" "+rcpc" "-target-feature" "+fullfp16" "-target-feature" "+sm4" "-target-feature" "+sha3" "-target-feature" "+sha2" "-target-feature" "+aes" // CHECK-MCPU-M1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" // CHECK-MCPU-M4: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+dotprod" "-target-feature" "+fullfp16" // CHECK-MCPU-KRYO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 13b7cfc4b5cd9..e6bc1a2c5ff85 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -51,6 +51,14 @@ AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a", AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) +AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r", + ARMBuildAttrs::CPUArch::v8_R, FK_CRYPTO_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS | + AArch64::AEK_CRYPTO | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | + AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_DOTPROD | + AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_FP16 | + AArch64::AEK_FP16FML | AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_SB)) #undef AARCH64_ARCH #ifndef AARCH64_ARCH_EXT_NAME @@ -130,6 +138,8 @@ AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_NONE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp index 82f770766d9be..49e123a5974ed 100644 --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -118,6 +118,8 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK, Features.push_back("+v8.5a"); if (AK == AArch64::ArchKind::ARMV8_6A) Features.push_back("+v8.6a"); + if(AK == AArch64::ArchKind::ARMV8R) + Features.push_back("+v8r"); return AK != ArchKind::INVALID; } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 4f4bd9bbd98fb..f024e26ea788f 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -72,9 +72,11 @@ def FeatureLOR : SubtargetFeature< "lor", "HasLOR", "true", "Enables ARM v8.1 Limited Ordering Regions extension">; -def FeatureVH : SubtargetFeature< - "vh", "HasVH", "true", - "Enables ARM v8.1 Virtual Host extension">; +def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", + "true", "Enable RW operand CONTEXTIDR_EL2" >; + +def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", + "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >; def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable ARMv8 PMUv3 Performance Monitors extension">; @@ -441,6 +443,22 @@ def HasV8_6aOps : SubtargetFeature< [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>; +def HasV8_0rOps : SubtargetFeature< + "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", + [//v8.1 + FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2, + //v8.2 + FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4, + FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV, + //v8.3 + FeatureComplxNum, FeatureCCIDX, FeatureJS, + FeaturePA, FeatureRCPC, + //v8.4 + FeatureDotProd, FeatureFP16FML, FeatureRASv8_4, FeatureTRACEV8_4, + FeatureTLB_RMI, FeatureFMI, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, + //v8.5 + FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -506,6 +524,7 @@ def PAUnsupported : AArch64Unsupported { } include "AArch64SchedA53.td" +include "AArch64SchedA55.td" include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" include "AArch64SchedFalkor.td" @@ -652,6 +671,13 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", FeatureSSBS, FeatureDotProd]>; +def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", + "CortexR82", + "Cortex-R82 ARM Processors", [ + // All features are implied by v8_0r ops: + HasV8_0rOps, + ]>; + def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", "Cortex-X1 ARM processors", [ HasV8_2aOps, @@ -1013,6 +1039,7 @@ def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>; def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>; def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>; def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>; +def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>; def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 7d0b61d649334..a389bfbacd3c3 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -103,6 +103,7 @@ void AArch64Subtarget::initializeProperties() { case CortexA76: case CortexA77: case CortexA78: + case CortexR82: case CortexX1: PrefFunctionLogAlignment = 4; break; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 80b91af3d8d82..135dee0cdded9 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -57,6 +57,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { CortexA76, CortexA77, CortexA78, + CortexR82, CortexX1, ExynosM3, Falkor, @@ -84,6 +85,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasV8_5aOps = false; bool HasV8_6aOps = false; + bool HasV8_0rOps = false; + bool HasCONTEXTIDREL2 = false; + bool HasFPARMv8 = false; bool HasNEON = false; bool HasCrypto = false; @@ -306,6 +310,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasV8_3aOps() const { return HasV8_3aOps; } bool hasV8_4aOps() const { return HasV8_4aOps; } bool hasV8_5aOps() const { return HasV8_5aOps; } + bool hasV8_0rOps() const { return HasV8_0rOps; } bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } @@ -343,6 +348,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasSHA3() const { return HasSHA3; } bool hasSHA2() const { return HasSHA2; } bool hasAES() const { return HasAES; } + bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; } bool balanceFPOps() const { return BalanceFPOps; } bool predictableSelectIsExpensive() const { return PredictableSelectIsExpensive; diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index ceceabc6ff4ed..76117012731b1 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -32,6 +32,11 @@ def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">, AssemblerPredicate<(all_of FeaturePAN_RWV), "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">; +def HasCONTEXTIDREL2 + : Predicate<"Subtarget->hasCONTEXTIDREL2()">, + AssemblerPredicate<(all_of FeatureCONTEXTIDREL2), + "Target contains CONTEXTIDR_EL2 RW operand">; + //===----------------------------------------------------------------------===// // AT (address translate) instruction options. //===----------------------------------------------------------------------===// @@ -1220,7 +1225,6 @@ def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>; // Op0 Op1 CRn CRm Op2 let Requires = [{ {AArch64::FeatureVH} }] in { def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>; -def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>; def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>; def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>; def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>; @@ -1246,6 +1250,9 @@ def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>; def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>; def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>; def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>; +let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in { + def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>; +} } // v8.2a registers // Op0 Op1 CRn CRm Op2 diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 502966c633676..6cc841d37dba1 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -5251,6 +5251,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind, case AArch64::ArchKind::ARMV8_4A: case AArch64::ArchKind::ARMV8_5A: case AArch64::ArchKind::ARMV8_6A: + case AArch64::ArchKind::ARMV8R: RequestedExtensions.push_back("sm4"); RequestedExtensions.push_back("sha3"); RequestedExtensions.push_back("sha2"); diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index bec8a395f5586..4b167dc67e059 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -881,6 +881,14 @@ TEST(TargetParserTest, testAArch64CPU) { AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A")); + EXPECT_TRUE(testAArch64CPU( + "cortex-r82", "armv8-r", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS | + AArch64::AEK_CRYPTO | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | + AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_DOTPROD | + AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_FP16 | + AArch64::AEK_FP16FML | AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_SB, "8-R")); EXPECT_TRUE(testAArch64CPU( "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | @@ -1026,7 +1034,7 @@ TEST(TargetParserTest, testAArch64CPU) { "8.2-A")); } -static constexpr unsigned NumAArch64CPUArchs = 42; +static constexpr unsigned NumAArch64CPUArchs = 43; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; From 53fb9d062b42e4d46c88766aaac8fb88bab8fb77 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 13:19:02 +0100 Subject: [PATCH 368/544] [InstCombine] Add partial bswap vector test from D88578 --- llvm/test/Transforms/InstCombine/bswap.ll | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 18a831a330cbe..14b8ea7ecf2a0 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -519,6 +519,28 @@ define i32 @partial_bswap(i32 %x) { } declare i32 @llvm.bswap.i32(i32) +define <2 x i32> @partial_bswap_vector(<2 x i32> %x) { +; CHECK-LABEL: @partial_bswap_vector( +; CHECK-NEXT: [[X3:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[A2:%.*]] = shl <2 x i32> [[X]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i32> [[A2]], +; CHECK-NEXT: [[X32:%.*]] = or <2 x i32> [[X3]], [[X2]] +; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[T1]]) +; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[X32]], [[T2]] +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %x3 = shl <2 x i32> %x, + %a2 = shl <2 x i32> %x, + %x2 = and <2 x i32> %a2, + %x32 = or <2 x i32> %x3, %x2 + %t1 = and <2 x i32> %x, + %t2 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %t1) + %r = or <2 x i32> %x32, %t2 + ret <2 x i32> %r +} +declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) + define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 From bb448a248371b48dbca8d647b7aaf9393154cf3d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Oct 2020 13:31:23 +0100 Subject: [PATCH 369/544] [SLP] Add test where reduction result is used in PHI. Test case for PR47670. --- .../SLPVectorizer/X86/horizontal.ll | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index fb7a0b5bedc7f..703ebbf9dd732 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1626,4 +1626,65 @@ normal: ret void } +; Test case from PR47670. Reduction result is used as incoming value in phi. +define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) { +; CHECK-LABEL: @reduction_result_used_in_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[DATA:%.*]], align 4 +; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[IDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[L_1]], [[L_0]] +; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[IDX_2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[L_2]], [[ADD_1]] +; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 +; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[IDX_3]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[L_3]], [[ADD_2]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_3]], [[BB]] ] +; CHECK-NEXT: ret i32 [[SUM_1]] +; +; STORE-LABEL: @reduction_result_used_in_phi( +; STORE-NEXT: entry: +; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; STORE: bb: +; STORE-NEXT: [[L_0:%.*]] = load i32, i32* [[DATA:%.*]], align 4 +; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; STORE-NEXT: [[L_1:%.*]] = load i32, i32* [[IDX_1]], align 4 +; STORE-NEXT: [[ADD_1:%.*]] = add i32 [[L_1]], [[L_0]] +; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; STORE-NEXT: [[L_2:%.*]] = load i32, i32* [[IDX_2]], align 4 +; STORE-NEXT: [[ADD_2:%.*]] = add i32 [[L_2]], [[ADD_1]] +; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3 +; STORE-NEXT: [[L_3:%.*]] = load i32, i32* [[IDX_3]], align 4 +; STORE-NEXT: [[ADD_3:%.*]] = add i32 [[L_3]], [[ADD_2]] +; STORE-NEXT: br label [[EXIT]] +; STORE: exit: +; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_3]], [[BB]] ] +; STORE-NEXT: ret i32 [[SUM_1]] +; +entry: + br i1 %b, label %bb, label %exit + +bb: + %l.0 = load i32, i32* %data, align 4 + %idx.1 = getelementptr inbounds i32, i32* %data, i64 1 + %l.1 = load i32, i32* %idx.1, align 4 + %add.1 = add i32 %l.1, %l.0 + %idx.2 = getelementptr inbounds i32, i32* %data, i64 2 + %l.2 = load i32, i32* %idx.2, align 4 + %add.2 = add i32 %l.2, %add.1 + %idx.3 = getelementptr inbounds i32, i32* %data, i64 3 + %l.3 = load i32, i32* %idx.3, align 4 + %add.3 = add i32 %l.3, %add.2 + br label %exit + +exit: + %sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb] + ret i32 %sum.1 +} + declare i32 @__gxx_personality_v0(...) From 71b89b14934d99f32da150f62042aa9e09113b5f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 13:53:21 +0100 Subject: [PATCH 370/544] LoopAccessAnalysis.cpp - use const reference in for-range loops. NFCI. --- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 4d3de1e404c3e..9d740602a1e80 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -730,7 +730,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // First, count how many write and read accesses are in the alias set. Also // collect MemAccessInfos for later. SmallVector AccessInfos; - for (auto A : AS) { + for (const auto &A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); @@ -864,7 +864,7 @@ void AccessAnalysis::processMemAccesses() { // compatibility and potential for underlying-object overlap. As a result, we // only need to check for potential pointer dependencies within each alias // set. - for (auto &AS : AST) { + for (const auto &AS : AST) { // Note that both the alias-set tracker and the alias sets themselves used // linked lists internally and so the iteration order here is deterministic // (matching the original instruction order within each set). @@ -884,12 +884,12 @@ void AccessAnalysis::processMemAccesses() { bool UseDeferred = SetIteration > 0; PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; - for (auto AV : AS) { + for (const auto &AV : AS) { Value *Ptr = AV.getValue(); // For a single memory access in AliasSetTracker, Accesses may contain // both read and write, and they both need to be handled for CheckDeps. - for (auto AC : S) { + for (const auto &AC : S) { if (AC.getPointer() != Ptr) continue; From 4edd74a1986f5e347a1f2e060df7f6372938fa9e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 13:56:13 +0100 Subject: [PATCH 371/544] BlockFrequencyInfoImpl.h - use const references to avoid FrequencyData copies. NFCI. --- llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index e6ddaa7ef31ef..c227875311177 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1449,8 +1449,8 @@ void BlockFrequencyInfoImpl::verifyMatch( BlockNode Node = Entry.second; if (OtherValidNodes.count(BB)) { BlockNode OtherNode = OtherValidNodes[BB]; - auto Freq = Freqs[Node.Index]; - auto OtherFreq = Other.Freqs[OtherNode.Index]; + const auto &Freq = Freqs[Node.Index]; + const auto &OtherFreq = Other.Freqs[OtherNode.Index]; if (Freq.Integer != OtherFreq.Integer) { Match = false; dbgs() << "Freq mismatch: " << bfi_detail::getBlockName(BB) << " " From c7d4aa711ab7981358b5e17e56f1fb6f7f585ac1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 30 Sep 2020 20:05:41 -0400 Subject: [PATCH 372/544] [libc++] Move the weak symbols list to libc++abi Those symbols are exported from libc++abi in the first place, so it makes more sense to have them there. --- libcxx/lib/weak.exp | 16 ---------------- libcxx/src/CMakeLists.txt | 3 +-- libcxxabi/lib/weak.exp | 7 +++++++ libcxxabi/src/CMakeLists.txt | 2 ++ 4 files changed, 10 insertions(+), 18 deletions(-) delete mode 100644 libcxx/lib/weak.exp create mode 100644 libcxxabi/lib/weak.exp diff --git a/libcxx/lib/weak.exp b/libcxx/lib/weak.exp deleted file mode 100644 index 6bdcc0578460d..0000000000000 --- a/libcxx/lib/weak.exp +++ /dev/null @@ -1,16 +0,0 @@ -__ZTISt10bad_typeid -__ZTISt11logic_error -__ZTISt11range_error -__ZTISt12domain_error -__ZTISt12length_error -__ZTISt12out_of_range -__ZTISt13bad_exception -__ZTISt13runtime_error -__ZTISt14overflow_error -__ZTISt15underflow_error -__ZTISt16invalid_argument -__ZTISt16nested_exception -__ZTISt20bad_array_new_length -__ZTISt8bad_cast -__ZTISt9bad_alloc -__ZTISt9exception diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 0e6819369ffa1..98a374c2bf9f5 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -210,8 +210,7 @@ if (LIBCXX_ENABLE_SHARED) target_link_libraries(cxx_shared PRIVATE "-Wl,-unexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++unexp.exp" "-Wl,-reexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++abi.v${LIBCXX_LIBCPPABI_VERSION}.exp" - "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp" - "-Wl,-force_symbols_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/weak.exp") + "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp") if (NOT LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS) target_link_libraries(cxx_shared PRIVATE diff --git a/libcxxabi/lib/weak.exp b/libcxxabi/lib/weak.exp new file mode 100644 index 0000000000000..784ca45decc19 --- /dev/null +++ b/libcxxabi/lib/weak.exp @@ -0,0 +1,7 @@ +__ZTISt11range_error +__ZTISt12domain_error +__ZTISt12length_error +__ZTISt12out_of_range +__ZTISt14overflow_error +__ZTISt15underflow_error +__ZTISt16invalid_argument diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index c57d6fa83aa0f..58d04e6578e31 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -214,6 +214,8 @@ if (LIBCXXABI_ENABLE_SHARED) export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/itanium-base.exp") + target_link_libraries(cxxabi_shared PRIVATE "-Wl,-force_symbols_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/weak.exp") + if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS) export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/new-delete.exp") endif() From 55b97a6d2a7459dd5bdc78b199c05525302137c2 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Fri, 2 Oct 2020 09:36:11 -0400 Subject: [PATCH 373/544] [LLD][COFF] Add more type record information to /summary This adds the following two new lines to /summary: 21351 Input OBJ files (expanded from all cmd-line inputs) 61 PDB type server dependencies 38 Precomp OBJ dependencies 1420669231 Input type records <<<< 78665073382 Input type records bytes <<<< 8801393 Merged TPI records 3177158 Merged IPI records 59194 Output PDB strings 71576766 Global symbol records 25416935 Module symbol records 2103431 Public symbol records Differential Revision: https://reviews.llvm.org/D88703 --- lld/COFF/DebugTypes.cpp | 27 +++++++++++++++++++++++ lld/COFF/DebugTypes.h | 3 +++ lld/COFF/PDB.cpp | 11 +++++++++ lld/test/COFF/pdb-type-server-simple.test | 2 ++ lld/test/COFF/precomp-link.test | 6 +++-- lld/test/COFF/precomp-summary-fail.test | 2 ++ 6 files changed, 49 insertions(+), 2 deletions(-) diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index baec05d1e87a6..febbd19084dac 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -332,6 +332,8 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { ipiMap = indexMapStorage; if (config->showSummary) { + nbTypeRecords = indexMapStorage.size() - nbHeadIndices; + nbTypeRecordsBytes = reader.getLength(); // Count how many times we saw each type record in our input. This // calculation requires a second pass over the type records to classify each // record as a type or index. This is slow, but this code executes when @@ -386,6 +388,12 @@ Error TypeServerSource::mergeDebugT(TypeMerger *m) { } if (config->showSummary) { + nbTypeRecords = tpiMap.size() + ipiMap.size(); + nbTypeRecordsBytes = + expectedTpi->typeArray().getUnderlyingStream().getLength() + + (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength() + : 0); + // Count how many times we saw each type record in our input. If a // destination type index is present in the source to destination type index // map, that means we saw it once in the input. Add it to our histogram. @@ -693,6 +701,11 @@ void TpiSource::remapTpiWithGHashes(GHashState *g) { ipiMap = indexMapStorage; mergeUniqueTypeRecords(file->debugTypes); // TODO: Free all unneeded ghash resources now that we have a full index map. + + if (config->showSummary) { + nbTypeRecords = ghashes.size(); + nbTypeRecordsBytes = file->debugTypes.size(); + } } // PDBs do not actually store global hashes, so when merging a type server @@ -759,6 +772,16 @@ void TypeServerSource::remapTpiWithGHashes(GHashState *g) { ipiSrc->tpiMap = tpiMap; ipiSrc->ipiMap = ipiMap; ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray())); + + if (config->showSummary) { + nbTypeRecords = ipiSrc->ghashes.size(); + nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength(); + } + } + + if (config->showSummary) { + nbTypeRecords += ghashes.size(); + nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength(); } } @@ -834,6 +857,10 @@ void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { mergeUniqueTypeRecords(file->debugTypes, TypeIndex(precompDependency.getStartTypeIndex() + precompDependency.getTypesCount())); + if (config->showSummary) { + nbTypeRecords = ghashes.size(); + nbTypeRecordsBytes = file->debugTypes.size(); + } } namespace { diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h index ebb3b2bac6930..42485df06f873 100644 --- a/lld/COFF/DebugTypes.h +++ b/lld/COFF/DebugTypes.h @@ -182,6 +182,9 @@ class TpiSource { MergedInfo mergedTpi; MergedInfo mergedIpi; + + uint64_t nbTypeRecords = 0; + uint64_t nbTypeRecordsBytes = 0; }; TpiSource *makeTpiSource(ObjFile *file); diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index ae2dc9afca280..846d7a11fbfa9 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -146,6 +146,8 @@ class PDBLinker { uint64_t globalSymbols = 0; uint64_t moduleSymbols = 0; uint64_t publicSymbols = 0; + uint64_t nbTypeRecords = 0; + uint64_t nbTypeRecordsBytes = 0; }; class DebugSHandler { @@ -970,6 +972,13 @@ void PDBLinker::addObjectsToPDB() { addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); } t2.stop(); + + if (config->showSummary) { + for_each(TpiSource::instances, [&](TpiSource *source) { + nbTypeRecords += source->nbTypeRecords; + nbTypeRecordsBytes += source->nbTypeRecordsBytes; + }); + } } void PDBLinker::addPublicsToPDB() { @@ -1009,6 +1018,8 @@ void PDBLinker::printStats() { "Input OBJ files (expanded from all cmd-line inputs)"); print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies"); print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies"); + print(nbTypeRecords, "Input type records"); + print(nbTypeRecordsBytes, "Input type records bytes"); print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records"); print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records"); print(pdbStrTab.size(), "Output PDB strings"); diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test index b954712d9b6c3..7c72d8fe86690 100644 --- a/lld/test/COFF/pdb-type-server-simple.test +++ b/lld/test/COFF/pdb-type-server-simple.test @@ -105,6 +105,8 @@ SUMMARY-NEXT: ------------------------------------------------------------------ SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 1 PDB type server dependencies SUMMARY-NEXT: 0 Precomp OBJ dependencies +SUMMARY-NEXT: 25 Input type records +SUMMARY-NEXT: 868 Input type records bytes SUMMARY-NEXT: 9 Merged TPI records SUMMARY-NEXT: 16 Merged IPI records SUMMARY-NEXT: 3 Output PDB strings diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test index 161ee88d27f5e..c9a5c605da8f0 100644 --- a/lld/test/COFF/precomp-link.test +++ b/lld/test/COFF/precomp-link.test @@ -64,8 +64,10 @@ SUMMARY-NEXT: ------------------------------------------------------------------ SUMMARY-NEXT: 3 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 0 PDB type server dependencies SUMMARY-NEXT: 1 Precomp OBJ dependencies -SUMMARY-NEXT: 874 Merged TPI records -SUMMARY-NEXT: 170 Merged IPI records +SUMMARY-NEXT: 1066 Input type records +SUMMARY-NEXT: 55968 Input type records bytes +SUMMARY-NEXT: 874 Merged TPI records +SUMMARY-NEXT: 170 Merged IPI records SUMMARY-NEXT: 5 Output PDB strings SUMMARY-NEXT: 167 Global symbol records SUMMARY-NEXT: 20 Module symbol records diff --git a/lld/test/COFF/precomp-summary-fail.test b/lld/test/COFF/precomp-summary-fail.test index b689839be9d22..5ebba9a1d3c74 100644 --- a/lld/test/COFF/precomp-summary-fail.test +++ b/lld/test/COFF/precomp-summary-fail.test @@ -14,6 +14,8 @@ SUMMARY-NEXT: ------------------------------------------------------------------ SUMMARY-NEXT: 2 Input OBJ files (expanded from all cmd-line inputs) SUMMARY-NEXT: 0 PDB type server dependencies SUMMARY-NEXT: 1 Precomp OBJ dependencies +SUMMARY-NEXT: 8 Input type records +SUMMARY-NEXT: 232 Input type records bytes SUMMARY-NEXT: 3 Merged TPI records SUMMARY-NEXT: 2 Merged IPI records SUMMARY-NEXT: 1 Output PDB strings From fe1f0a1a19011154c73b9f6ab764c39dac9b4e79 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Fri, 2 Oct 2020 09:53:43 -0400 Subject: [PATCH 374/544] [LLD] Fix /time formatting for very long runs. NFC. --- lld/Common/Timer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/Common/Timer.cpp b/lld/Common/Timer.cpp index ea221fd86f3e5..16c518e4bf841 100644 --- a/lld/Common/Timer.cpp +++ b/lld/Common/Timer.cpp @@ -45,7 +45,7 @@ void Timer::print() { if (child->total > 0) child->print(1, totalDuration); - message(std::string(49, '-')); + message(std::string(50, '-')); root().print(0, root().millis(), false); } @@ -62,7 +62,7 @@ void Timer::print(int depth, double totalDuration, bool recurse) const { SmallString<32> str; llvm::raw_svector_ostream stream(str); std::string s = std::string(depth * 2, ' ') + name + std::string(":"); - stream << format("%-30s%5d ms (%5.1f%%)", s.c_str(), (int)millis(), p); + stream << format("%-30s%7d ms (%5.1f%%)", s.c_str(), (int)millis(), p); message(str); From fa59135bf1b4ab424f1f13d335f2b8bd666bc9a5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 14:55:53 +0100 Subject: [PATCH 375/544] [Analysis] Drop local maxAPInt/minAPInt helpers. NFCI. Use standard APIntOps::smax/smin helpers instead. --- llvm/lib/Analysis/DependenceAnalysis.cpp | 61 ++++++++++-------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 9b108e703e513..374085dadf667 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1459,19 +1459,6 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { return Q; } - -static -APInt maxAPInt(APInt A, APInt B) { - return A.sgt(B) ? A : B; -} - - -static -APInt minAPInt(APInt A, APInt B) { - return A.slt(B) ? A : B; -} - - // exactSIVtest - // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], // where i is an induction variable, c1 and c2 are loop invariant, and a1 @@ -1542,18 +1529,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(BM/G, LM-X) and test(-BM/G, X-UM) APInt TMUL = BM.sdiv(G); if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (UMvalid) { - TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(UM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (UMvalid) { - TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(UM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1561,18 +1548,18 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(AM/G, LM-Y) and test(-AM/G, Y-UM) TMUL = AM.sdiv(G); if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (UMvalid) { - TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(UM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (UMvalid) { - TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(UM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1591,11 +1578,11 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); TMUL = AM - BM; if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(X - Y + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { @@ -1608,20 +1595,20 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, TL = SaveTL; LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(X - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } TMUL = BM - AM; if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(Y - X, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { @@ -1634,11 +1621,11 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, TL = SaveTL; LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); } else { - TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(Y - X + 1, TMUL)); LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); } if (TL.sle(TU)) { @@ -1950,18 +1937,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(BM/G, LM-X) and test(-BM/G, X-UM) APInt TMUL = BM.sdiv(G); if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (SrcUMvalid) { - TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(SrcUM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (SrcUMvalid) { - TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(SrcUM - X, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } @@ -1969,18 +1956,18 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, // test(AM/G, LM-Y) and test(-AM/G, Y-UM) TMUL = AM.sdiv(G); if (TMUL.sgt(0)) { - TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); if (DstUMvalid) { - TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(DstUM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); } } else { - TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); if (DstUMvalid) { - TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); + TL = APIntOps::smax(TL, ceilingOfQuotient(DstUM - Y, TMUL)); LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); } } From 0f0cbcc4b166f32603371fb1d62ef3816cf8425f Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Fri, 2 Oct 2020 16:01:25 +0200 Subject: [PATCH 376/544] [clangd] Extend the rename API. several changes: - return a structure result in rename API; - prepareRename now returns more information (main-file occurrences); - remove the duplicated detecting-touch-identifier code in prepareRename (which is implemented in rename API); Differential Revision: https://reviews.llvm.org/D88634 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 19 +++--- clang-tools-extra/clangd/ClangdServer.cpp | 55 +++++++--------- clang-tools-extra/clangd/ClangdServer.h | 7 ++- clang-tools-extra/clangd/SourceCode.h | 2 + clang-tools-extra/clangd/refactor/Rename.cpp | 52 ++++++++------- clang-tools-extra/clangd/refactor/Rename.h | 12 +++- clang-tools-extra/clangd/test/rename.test | 9 ++- .../clangd/unittests/RenameTests.cpp | 63 +++++++++++++++---- .../clangd/unittests/SyncAPI.cpp | 16 +++-- clang-tools-extra/clangd/unittests/SyncAPI.h | 10 ++- 10 files changed, 159 insertions(+), 86 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index c2915aeada4f8..34d5a305494c3 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -793,8 +793,13 @@ void ClangdLSPServer::onWorkspaceSymbol( void ClangdLSPServer::onPrepareRename(const TextDocumentPositionParams &Params, Callback> Reply) { - Server->prepareRename(Params.textDocument.uri.file(), Params.position, - Opts.Rename, std::move(Reply)); + Server->prepareRename( + Params.textDocument.uri.file(), Params.position, Opts.Rename, + [Reply = std::move(Reply)](llvm::Expected Result) mutable { + if (!Result) + return Reply(Result.takeError()); + return Reply(std::move(Result->Target)); + }); } void ClangdLSPServer::onRename(const RenameParams &Params, @@ -806,14 +811,14 @@ void ClangdLSPServer::onRename(const RenameParams &Params, Server->rename( File, Params.position, Params.newName, Opts.Rename, [File, Params, Reply = std::move(Reply), - this](llvm::Expected Edits) mutable { - if (!Edits) - return Reply(Edits.takeError()); - if (auto Err = validateEdits(DraftMgr, *Edits)) + this](llvm::Expected R) mutable { + if (!R) + return Reply(R.takeError()); + if (auto Err = validateEdits(DraftMgr, R->GlobalChanges)) return Reply(std::move(Err)); WorkspaceEdit Result; Result.changes.emplace(); - for (const auto &Rep : *Edits) { + for (const auto &Rep : R->GlobalChanges) { (*Result.changes)[URI::createFile(Rep.first()).toString()] = Rep.second.asTextEdits(); } diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 8c73b6a7d0632..0840155fc8f96 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -400,46 +400,35 @@ void ClangdServer::formatOnType(PathRef File, llvm::StringRef Code, void ClangdServer::prepareRename(PathRef File, Position Pos, const RenameOptions &RenameOpts, - Callback> CB) { + Callback CB) { auto Action = [Pos, File = File.str(), CB = std::move(CB), RenameOpts, this](llvm::Expected InpAST) mutable { if (!InpAST) return CB(InpAST.takeError()); - auto &AST = InpAST->AST; - const auto &SM = AST.getSourceManager(); - auto Loc = sourceLocationInMainFile(SM, Pos); - if (!Loc) - return CB(Loc.takeError()); - const auto *TouchingIdentifier = - spelledIdentifierTouching(*Loc, AST.getTokens()); - if (!TouchingIdentifier) - return CB(llvm::None); // no rename on non-identifiers. - - auto Range = halfOpenToRange( - SM, CharSourceRange::getCharRange(TouchingIdentifier->location(), - TouchingIdentifier->endLocation())); - - if (RenameOpts.AllowCrossFile) - // FIXME: we now assume cross-file rename always succeeds, revisit this. - return CB(Range); - - // Performing the local rename isn't substantially more expensive than - // doing an AST-based check, so we just rename and throw away the results. - auto Changes = clangd::rename({Pos, "dummy", AST, File, Index, RenameOpts, - /*GetDirtyBuffer=*/nullptr}); - if (!Changes) { + // prepareRename is latency-sensitive: + // - for single-file rename, performing rename isn't substantially more + // expensive than doing an AST-based check (the index is used to see if + // the rename is complete); + // - for cross-file rename, we deliberately pass a nullptr index to save + // the cost, thus the result may be incomplete as it only contains + // main-file occurrences; + auto Results = clangd::rename({Pos, /*NewName*/ "", InpAST->AST, File, + RenameOpts.AllowCrossFile ? nullptr : Index, + RenameOpts}); + if (!Results) { // LSP says to return null on failure, but that will result in a generic // failure message. If we send an LSP error response, clients can surface // the message to users (VSCode does). - return CB(Changes.takeError()); + return CB(Results.takeError()); } - return CB(Range); + return CB(*Results); }; WorkScheduler.runWithAST("PrepareRename", File, std::move(Action)); } void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, - const RenameOptions &Opts, Callback CB) { + const RenameOptions &Opts, + Callback CB) { // A snapshot of all file dirty buffers. llvm::StringMap Snapshot = WorkScheduler.getAllFileContents(); auto Action = [File = File.str(), NewName = NewName.str(), Pos, Opts, @@ -457,24 +446,24 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, return llvm::None; return It->second; }; - auto Edits = clangd::rename( + auto R = clangd::rename( {Pos, NewName, InpAST->AST, File, Index, Opts, GetDirtyBuffer}); - if (!Edits) - return CB(Edits.takeError()); + if (!R) + return CB(R.takeError()); if (Opts.WantFormat) { auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS); llvm::Error Err = llvm::Error::success(); - for (auto &E : *Edits) + for (auto &E : R->GlobalChanges) Err = llvm::joinErrors(reformatEdit(E.getValue(), Style), std::move(Err)); if (Err) return CB(std::move(Err)); } - RenameFiles.record(Edits->size()); - return CB(std::move(*Edits)); + RenameFiles.record(R->GlobalChanges.size()); + return CB(*R); }; WorkScheduler.runWithAST("Rename", File, std::move(Action)); } diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index d801d3cd4353c..a9d46fa5278fe 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -273,9 +273,12 @@ class ClangdServer { StringRef TriggerText, Callback> CB); /// Test the validity of a rename operation. + /// + /// The returned result describes edits in the main-file only (all + /// occurrences of the renamed symbol are simply deleted. void prepareRename(PathRef File, Position Pos, const RenameOptions &RenameOpts, - Callback> CB); + Callback CB); /// Rename all occurrences of the symbol at the \p Pos in \p File to /// \p NewName. @@ -283,7 +286,7 @@ class ClangdServer { /// embedders could use this method to get all occurrences of the symbol (e.g. /// highlighting them in prepare stage). void rename(PathRef File, Position Pos, llvm::StringRef NewName, - const RenameOptions &Opts, Callback CB); + const RenameOptions &Opts, Callback CB); struct TweakRef { std::string ID; /// ID to pass for applyTweak. diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index eb63a191ab909..128f985a52664 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -181,6 +181,8 @@ struct Edit { tooling::Replacements Replacements; std::string InitialCode; + Edit() = default; + Edit(llvm::StringRef Code, tooling::Replacements Reps) : Replacements(std::move(Reps)), InitialCode(Code) {} diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index 2744caa586485..c48bc2856cb7d 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -182,8 +182,6 @@ llvm::Optional renameable(const NamedDecl &RenameDecl, } assert(CrossFile); - if (!Index) - return ReasonToReject::NoIndexProvided; // FIXME: Renaming virtual methods requires to rename all overridens in // subclasses, our index doesn't have this information. @@ -427,7 +425,7 @@ void findNearMiss( } // namespace -llvm::Expected rename(const RenameInputs &RInputs) { +llvm::Expected rename(const RenameInputs &RInputs) { trace::Span Tracer("Rename flow"); const auto &Opts = RInputs.Opts; ParsedAST &AST = RInputs.AST; @@ -456,9 +454,13 @@ llvm::Expected rename(const RenameInputs &RInputs) { return Loc.takeError(); const syntax::Token *IdentifierToken = spelledIdentifierTouching(*Loc, AST.getTokens()); + // Renames should only triggered on identifiers. if (!IdentifierToken) return makeError(ReasonToReject::NoSymbolFound); + Range CurrentIdentifier = halfOpenToRange( + SM, CharSourceRange::getCharRange(IdentifierToken->location(), + IdentifierToken->endLocation())); // FIXME: Renaming macros is not supported yet, the macro-handling code should // be moved to rename tooling library. if (locateMacroAt(*IdentifierToken, AST.getPreprocessor())) @@ -489,32 +491,40 @@ llvm::Expected rename(const RenameInputs &RInputs) { auto MainFileRenameEdit = renameWithinFile(AST, RenameDecl, RInputs.NewName); if (!MainFileRenameEdit) return MainFileRenameEdit.takeError(); + RenameResult Result; + Result.Target = CurrentIdentifier; + Edit MainFileEdits = Edit(MainFileCode, std::move(*MainFileRenameEdit)); + llvm::for_each(MainFileEdits.asTextEdits(), [&Result](const TextEdit &TE) { + Result.LocalChanges.push_back(TE.range); + }); // return the main file edit if this is a within-file rename or the symbol // being renamed is function local. if (!Opts.AllowCrossFile || RenameDecl.getParentFunctionOrMethod()) { - return FileEdits( - {std::make_pair(RInputs.MainFilePath, - Edit{MainFileCode, std::move(*MainFileRenameEdit)})}); + Result.GlobalChanges = FileEdits( + {std::make_pair(RInputs.MainFilePath, std::move(MainFileEdits))}); + return Result; } - FileEdits Results; - // Renameable safely guards us that at this point we are renaming a local - // symbol if we don't have index. - if (RInputs.Index) { - auto OtherFilesEdits = renameOutsideFile( - RenameDecl, RInputs.MainFilePath, RInputs.NewName, *RInputs.Index, - Opts.LimitFiles == 0 ? std::numeric_limits::max() - : Opts.LimitFiles, - GetFileContent); - if (!OtherFilesEdits) - return OtherFilesEdits.takeError(); - Results = std::move(*OtherFilesEdits); + // If the index is nullptr, we don't know the completeness of the result, so + // we don't populate the field GlobalChanges. + if (!RInputs.Index) { + assert(Result.GlobalChanges.empty() && Opts.AllowCrossFile); + return Result; } + + auto OtherFilesEdits = renameOutsideFile( + RenameDecl, RInputs.MainFilePath, RInputs.NewName, *RInputs.Index, + Opts.LimitFiles == 0 ? std::numeric_limits::max() + : Opts.LimitFiles, + GetFileContent); + if (!OtherFilesEdits) + return OtherFilesEdits.takeError(); + Result.GlobalChanges = *OtherFilesEdits; // Attach the rename edits for the main file. - Results.try_emplace(RInputs.MainFilePath, MainFileCode, - std::move(*MainFileRenameEdit)); - return Results; + Result.GlobalChanges.try_emplace(RInputs.MainFilePath, + std::move(MainFileEdits)); + return Result; } llvm::Expected buildRenameEdit(llvm::StringRef AbsFilePath, diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h index e27951605ebca..ff23a67cfc461 100644 --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -55,10 +55,20 @@ struct RenameInputs { DirtyBufferGetter GetDirtyBuffer = nullptr; }; +struct RenameResult { + // The range of the symbol that the user can attempt to rename. + Range Target; + // Rename occurrences for the current main file. + std::vector LocalChanges; + // Complete edits for the rename, including LocalChanges. + // If the full set of changes is unknown, this field is empty. + FileEdits GlobalChanges; +}; + /// Renames all occurrences of the symbol. The result edits are unformatted. /// If AllowCrossFile is false, returns an error if rename a symbol that's used /// in another file (per the index). -llvm::Expected rename(const RenameInputs &RInputs); +llvm::Expected rename(const RenameInputs &RInputs); /// Generates rename edits that replaces all given occurrences with the /// NewName. diff --git a/clang-tools-extra/clangd/test/rename.test b/clang-tools-extra/clangd/test/rename.test index 214efb2b5e398..527b4263443a7 100644 --- a/clang-tools-extra/clangd/test/rename.test +++ b/clang-tools-extra/clangd/test/rename.test @@ -21,9 +21,12 @@ # CHECK-NEXT: } --- {"jsonrpc":"2.0","id":2,"method":"textDocument/prepareRename","params":{"textDocument":{"uri":"test:///foo.cpp"},"position":{"line":0,"character":2}}} -# CHECK: "id": 2, -# CHECK-NEXT: "jsonrpc": "2.0", -# CHECK-NEXT: "result": null +# CHECK: "error": { +# CHECK-NEXT: "code": -32001, +# CHECK-NEXT: "message": "Cannot rename symbol: there is no symbol at the given location" +# CHECK-NEXT: }, +# CHECK-NEXT: "id": 2, +# CHECK-NEXT: "jsonrpc": "2.0" --- {"jsonrpc":"2.0","id":4,"method":"textDocument/rename","params":{"textDocument":{"uri":"test:///foo.cpp"},"position":{"line":0,"character":2},"newName":"bar"}} # CHECK: "error": { diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 08af7aad0099f..cc2454e9d04e8 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -502,9 +502,10 @@ TEST(RenameTest, WithinFileRename) { auto RenameResult = rename({RenamePos, NewName, AST, testPath(TU.Filename)}); ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError(); - ASSERT_EQ(1u, RenameResult->size()); - EXPECT_EQ(applyEdits(std::move(*RenameResult)).front().second, - expectedResult(Code, NewName)); + ASSERT_EQ(1u, RenameResult->GlobalChanges.size()); + EXPECT_EQ( + applyEdits(std::move(RenameResult->GlobalChanges)).front().second, + expectedResult(Code, NewName)); } } } @@ -653,8 +654,8 @@ TEST(RenameTest, Renameable) { } else { EXPECT_TRUE(bool(Results)) << "rename returned an error: " << llvm::toString(Results.takeError()); - ASSERT_EQ(1u, Results->size()); - EXPECT_EQ(applyEdits(std::move(*Results)).front().second, + ASSERT_EQ(1u, Results->GlobalChanges.size()); + EXPECT_EQ(applyEdits(std::move(Results->GlobalChanges)).front().second, expectedResult(T, NewName)); } } @@ -683,8 +684,8 @@ TEST(RenameTest, MainFileReferencesOnly) { auto RenameResult = rename({Code.point(), NewName, AST, testPath(TU.Filename)}); ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError() << Code.point(); - ASSERT_EQ(1u, RenameResult->size()); - EXPECT_EQ(applyEdits(std::move(*RenameResult)).front().second, + ASSERT_EQ(1u, RenameResult->GlobalChanges.size()); + EXPECT_EQ(applyEdits(std::move(RenameResult->GlobalChanges)).front().second, expectedResult(Code, NewName)); } @@ -703,6 +704,44 @@ TEST(RenameTest, ProtobufSymbolIsExcluded) { testing::HasSubstr("not a supported kind")); } +TEST(RenameTest, PrepareRename) { + Annotations FooH("void func();"); + Annotations FooCC(R"cpp( + #include "foo.h" + void [[fu^nc]]() {} + )cpp"); + std::string FooHPath = testPath("foo.h"); + std::string FooCCPath = testPath("foo.cc"); + MockFS FS; + FS.Files[FooHPath] = std::string(FooH.code()); + FS.Files[FooCCPath] = std::string(FooCC.code()); + + auto ServerOpts = ClangdServer::optsForTest(); + ServerOpts.BuildDynamicSymbolIndex = true; + + MockCompilationDatabase CDB; + ClangdServer Server(CDB, FS, ServerOpts); + runAddDocument(Server, FooHPath, FooH.code()); + runAddDocument(Server, FooCCPath, FooCC.code()); + + auto Results = + runPrepareRename(Server, FooCCPath, FooCC.point(), {/*CrossFile=*/true}); + // verify that for multi-file rename, we only return main-file occurrences. + ASSERT_TRUE(bool(Results)) << Results.takeError(); + // We don't know the result is complete in prepareRename (passing a nullptr + // index internally), so GlobalChanges should be empty. + EXPECT_TRUE(Results->GlobalChanges.empty()); + EXPECT_THAT(FooCC.ranges(), + testing::UnorderedElementsAreArray(Results->LocalChanges)); + + // single-file rename on global symbols, we should report an error. + Results = + runPrepareRename(Server, FooCCPath, FooCC.point(), {/*CrossFile=*/false}); + EXPECT_FALSE(Results); + EXPECT_THAT(llvm::toString(Results.takeError()), + testing::HasSubstr("is used outside")); +} + TEST(CrossFileRenameTests, DirtyBuffer) { Annotations FooCode("class [[Foo]] {};"); std::string FooPath = testPath("foo.cc"); @@ -741,7 +780,7 @@ TEST(CrossFileRenameTests, DirtyBuffer) { GetDirtyBuffer}); ASSERT_TRUE(bool(Results)) << Results.takeError(); EXPECT_THAT( - applyEdits(std::move(*Results)), + applyEdits(std::move(Results->GlobalChanges)), UnorderedElementsAre( Pair(Eq(FooPath), Eq(expectedResult(FooDirtyBuffer, NewName))), Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); @@ -762,7 +801,7 @@ TEST(CrossFileRenameTests, DirtyBuffer) { GetDirtyBuffer}); ASSERT_TRUE(bool(Results)) << Results.takeError(); EXPECT_THAT( - applyEdits(std::move(*Results)), + applyEdits(std::move(Results->GlobalChanges)), UnorderedElementsAre( Pair(Eq(BarPath), Eq(expectedResult(BarCode, NewName))), Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); @@ -847,7 +886,7 @@ TEST(CrossFileRenameTests, DeduplicateRefsFromIndex) { {/*CrossFile=*/true}}); ASSERT_TRUE(bool(Results)) << Results.takeError(); EXPECT_THAT( - applyEdits(std::move(*Results)), + applyEdits(std::move(Results->GlobalChanges)), UnorderedElementsAre( Pair(Eq(BarPath), Eq(expectedResult(BarCode, NewName))), Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); @@ -1047,7 +1086,7 @@ TEST(CrossFileRenameTests, WithUpToDateIndex) { Server, FooHPath, RenamePos, NewName, {/*CrossFile=*/true})); EXPECT_THAT(Tracer.takeMetric("rename_files"), ElementsAre(2)); EXPECT_THAT( - applyEdits(std::move(FileEditsList)), + applyEdits(std::move(FileEditsList.GlobalChanges)), UnorderedElementsAre( Pair(Eq(FooHPath), Eq(expectedResult(T.FooH, NewName))), Pair(Eq(FooCCPath), Eq(expectedResult(T.FooCC, NewName))))); @@ -1066,7 +1105,7 @@ TEST(CrossFileRenameTests, CrossFileOnLocalSymbol) { auto Results = rename({Code.point(), NewName, AST, Path}); ASSERT_TRUE(bool(Results)) << Results.takeError(); EXPECT_THAT( - applyEdits(std::move(*Results)), + applyEdits(std::move(Results->GlobalChanges)), UnorderedElementsAre(Pair(Eq(Path), Eq(expectedResult(Code, NewName))))); } diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.cpp b/clang-tools-extra/clangd/unittests/SyncAPI.cpp index fb810f40c79f4..6d6879ab62dbf 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.cpp +++ b/clang-tools-extra/clangd/unittests/SyncAPI.cpp @@ -97,14 +97,22 @@ runFindDocumentHighlights(ClangdServer &Server, PathRef File, Position Pos) { return std::move(*Result); } -llvm::Expected runRename(ClangdServer &Server, PathRef File, - Position Pos, llvm::StringRef NewName, - const RenameOptions &RenameOpts) { - llvm::Optional> Result; +llvm::Expected runRename(ClangdServer &Server, PathRef File, + Position Pos, llvm::StringRef NewName, + const RenameOptions &RenameOpts) { + llvm::Optional> Result; Server.rename(File, Pos, NewName, RenameOpts, capture(Result)); return std::move(*Result); } +llvm::Expected runPrepareRename(ClangdServer &Server, + PathRef File, Position Pos, + const RenameOptions &RenameOpts) { + llvm::Optional> Result; + Server.prepareRename(File, Pos, RenameOpts, capture(Result)); + return std::move(*Result); +} + llvm::Expected runFormatFile(ClangdServer &Server, PathRef File, StringRef Code) { llvm::Optional> Result; diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.h b/clang-tools-extra/clangd/unittests/SyncAPI.h index 944717db41510..aa641fee91af4 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.h +++ b/clang-tools-extra/clangd/unittests/SyncAPI.h @@ -40,9 +40,13 @@ runLocateSymbolAt(ClangdServer &Server, PathRef File, Position Pos); llvm::Expected> runFindDocumentHighlights(ClangdServer &Server, PathRef File, Position Pos); -llvm::Expected runRename(ClangdServer &Server, PathRef File, - Position Pos, StringRef NewName, - const clangd::RenameOptions &RenameOpts); +llvm::Expected runRename(ClangdServer &Server, PathRef File, + Position Pos, StringRef NewName, + const clangd::RenameOptions &RenameOpts); + +llvm::Expected +runPrepareRename(ClangdServer &Server, PathRef File, Position Pos, + const clangd::RenameOptions &RenameOpts); llvm::Expected runFormatFile(ClangdServer &Server, PathRef File, StringRef Code); From 36501b180a4f0194f0cfb4374d096ae660182827 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Sat, 26 Sep 2020 22:28:04 -0400 Subject: [PATCH 377/544] Emit predefined macro for wavefront size for amdgcn Also fix the issue of multiple -m[no-]wavefrontsize64 options to make the last one wins. Differential Revision: https://reviews.llvm.org/D88370 --- clang/lib/Basic/Targets/AMDGPU.cpp | 3 ++ clang/lib/Basic/Targets/AMDGPU.h | 3 ++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 11 +----- clang/test/Driver/amdgpu-features.c | 10 ++++- clang/test/Driver/amdgpu-macros.cl | 40 +++++++++++++++++++- clang/test/Driver/hip-macros.hip | 20 ++++++++++ clang/test/Driver/hip-toolchain-features.hip | 19 +++++++--- 7 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 clang/test/Driver/hip-macros.hip diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index da25bf10ca07c..ba9be72af971b 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -316,6 +316,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, HasLegalHalfType = true; HasFloat16 = true; + WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; // Set pointer width and alignment for target address space 0. PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); @@ -388,6 +389,8 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__HAS_FP64__"); if (hasFastFMA()) Builder.defineMacro("FP_FAST_FMA"); + + Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); } void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 1f2fc081ae9d2..a0e4c19f4fea3 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -41,6 +41,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { llvm::AMDGPU::GPUKind GPUKind; unsigned GPUFeatures; + unsigned WavefrontSize; /// Target ID is device name followed by optional feature name postfixed /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sram-ecc-. @@ -407,6 +408,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind)); llvm::for_each(Features, [&](const auto &F) { assert(F.front() == '+' || F.front() == '-'); + if (F == "+wavefrontsize64") + WavefrontSize = 64; bool IsOn = F.front() == '+'; StringRef Name = StringRef(F).drop_front(); if (llvm::find(TargetIDFeatures, Name) == TargetIDFeatures.end()) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 61254e3eeaefe..656de9dd9e1e2 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -390,16 +390,9 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, } } - if (Args.getLastArg(options::OPT_mwavefrontsize64)) { - Features.push_back("-wavefrontsize16"); - Features.push_back("-wavefrontsize32"); + if (Args.hasFlag(options::OPT_mwavefrontsize64, + options::OPT_mno_wavefrontsize64, false)) Features.push_back("+wavefrontsize64"); - } - if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) { - Features.push_back("-wavefrontsize16"); - Features.push_back("+wavefrontsize32"); - Features.push_back("-wavefrontsize64"); - } handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index b97a98b90f7d1..71fd63715e003 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -25,10 +25,16 @@ // NO-SRAM-ECC: "-target-feature" "-sram-ecc" // RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64" +// RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx1010 -mno-wavefrontsize64 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// WAVE64: "-target-feature" "+wavefrontsize64" +// WAVE64-NOT: {{".*wavefrontsize16"}} +// WAVE64-NOT: {{".*wavefrontsize32"}} // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s -// NO-WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "+wavefrontsize32" "-target-feature" "-wavefrontsize64" +// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s +// NO-WAVE64-NOT: {{".*wavefrontsize16"}} +// NO-WAVE64-NOT: {{".*wavefrontsize32"}} +// NO-WAVE64-NOT: {{".*wavefrontsize64"}} // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mcumode %s 2>&1 | FileCheck --check-prefix=CUMODE %s // CUMODE: "-target-feature" "+cumode" diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 1c2a62d234b1e..71f96d4981d74 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -346,4 +346,42 @@ // GFX1011-DAG: #define __amdgcn_processor__ "gfx1011" // GFX1012-DAG: #define __amdgcn_processor__ "gfx1012" // GFX1030-DAG: #define __amdgcn_processor__ "gfx1030" -// GFX1031-DAG: #define __amdgcn_processor__ "gfx1031" \ No newline at end of file +// GFX1031-DAG: #define __amdgcn_processor__ "gfx1031" + +// GFX600-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX601-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX700-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX701-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX702-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX703-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX704-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX801-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX802-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX803-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX810-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX900-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX902-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX904-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX906-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX908-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX909-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX1010-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1011-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1012-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1030-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1031-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 + +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mno-wavefrontsize64 \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip new file mode 100644 index 0000000000000..00dcca17a08aa --- /dev/null +++ b/clang/test/Driver/hip-macros.hip @@ -0,0 +1,20 @@ +// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s +// RUN: %clang -E -dM --offload-arch=gfx906 -mno-wavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 diff --git a/clang/test/Driver/hip-toolchain-features.hip b/clang/test/Driver/hip-toolchain-features.hip index 838bc150b634f..38b94f9d0d734 100644 --- a/clang/test/Driver/hip-toolchain-features.hip +++ b/clang/test/Driver/hip-toolchain-features.hip @@ -37,8 +37,17 @@ // RUN: -mcumode -mcumode -mno-cumode -mwavefrontsize64 -mcumode \ // RUN: -mwavefrontsize64 -mno-wavefrontsize64 2>&1 \ // RUN: | FileCheck %s -check-prefix=DUP -// DUP: {{.*}}clang{{.*}} "-target-feature" "-wavefrontsize16" -// DUP-SAME: "-target-feature" "+wavefrontsize32" -// DUP-SAME: "-target-feature" "-wavefrontsize64" -// DUP-SAME: "-target-feature" "+cumode" -// DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=-wavefrontsize16,+wavefrontsize32,-wavefrontsize64,+cumode" +// DUP: {{.*}}clang{{.*}} "-target-feature" "+cumode" +// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize16" +// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize32" +// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64" +// DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode" + +// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ +// RUN: --cuda-gpu-arch=gfx1010 %s \ +// RUN: -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \ +// RUN: | FileCheck %s -check-prefix=WAVE64 +// WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64" +// WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize16" +// WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize32" +// WAVE64: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+wavefrontsize64" From 0c1bb4f8851b87224f33abafbaae805942009a7f Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Tue, 22 Sep 2020 13:58:54 -0400 Subject: [PATCH 378/544] [TableGen] New backend to print detailed records. Pertinent lints are fixed. --- llvm/docs/TableGen/BackGuide.rst | 58 ++++- llvm/include/llvm/Support/SourceMgr.h | 5 + llvm/include/llvm/TableGen/Record.h | 18 +- llvm/lib/Support/SourceMgr.cpp | 26 ++- llvm/lib/TableGen/CMakeLists.txt | 1 + llvm/lib/TableGen/DetailedRecordsBackend.cpp | 204 ++++++++++++++++++ llvm/lib/TableGen/Main.cpp | 2 + llvm/lib/TableGen/Record.cpp | 21 +- llvm/lib/TableGen/TableGenBackendSkeleton.cpp | 6 +- llvm/utils/TableGen/TableGen.cpp | 6 + 10 files changed, 331 insertions(+), 16 deletions(-) create mode 100644 llvm/lib/TableGen/DetailedRecordsBackend.cpp diff --git a/llvm/docs/TableGen/BackGuide.rst b/llvm/docs/TableGen/BackGuide.rst index 3d7244053055a..4ee5453f72927 100644 --- a/llvm/docs/TableGen/BackGuide.rst +++ b/llvm/docs/TableGen/BackGuide.rst @@ -690,11 +690,9 @@ Instances of the following classes can be printed using the ``<<`` operator: ``RecordVal``, and ``Init``. -A constant and two helper functions are provided for producing the output -file. The constant ``MAX_LINE_LEN`` specifies the maximum length of output -lines. The helper function ``printLine`` prints a horizontal line comment. -The helper function ``emitSourceFileHeader`` prints the header comment that -should be included at the top of every output file. +The helper function ``emitSourceFileHeader()`` prints the header comment +that should be included at the top of every output file. A call to it is +included in the skeleton backend file ``TableGenBackendSkeleton.cpp``. Printing Error Messages ======================= @@ -780,9 +778,53 @@ Classes are shown with their template arguments, parent classes (following fields. Note that anonymous records are named ``anonymous_0``, ``anonymous_1``, etc. - - The ``PrintDetailedRecords`` Backend ------------------------------------ -[to come] +The TableGen command option ``--print-detailed-records`` invokes a backend +that prints all the global variables, classes, and records defined in the +source files. The output looks like this. + +.. code-block:: text + + DETAILED RECORDS for file llvm-project\llvm\lib\target\arc\arc.td + + -------------------- Global Variables (5) -------------------- + + AMDGPUBufferIntrinsics = [int_amdgcn_buffer_load_format, ... + AMDGPUImageDimAtomicIntrinsics = [int_amdgcn_image_atomic_swap_1d, ... + ... + -------------------- Classes (758) -------------------- + + AMDGPUBufferLoad |IntrinsicsAMDGPU.td:879| + Template args: + LLVMType AMDGPUBufferLoad:data_ty = llvm_any_ty |IntrinsicsAMDGPU.td:879| + Superclasses: (SDPatternOperator) Intrinsic AMDGPURsrcIntrinsic + Fields: + list Properties = [SDNPMemOperand] |Intrinsics.td:348| + string LLVMName = "" |Intrinsics.td:343| + ... + -------------------- Records (12303) -------------------- + + AMDGPUSample_lz_o |IntrinsicsAMDGPU.td:560| + Defm sequence: |IntrinsicsAMDGPU.td:584| |IntrinsicsAMDGPU.td:566| + Superclasses: AMDGPUSampleVariant + Fields: + string UpperCaseMod = "_LZ_O" |IntrinsicsAMDGPU.td:542| + string LowerCaseMod = "_lz_o" |IntrinsicsAMDGPU.td:543| + ... + +* Global variables defined with outer ``defvar`` statements are shown with + their values. + +* The classes are shown with their source location, template arguments, + superclasses, and fields. + +* The records are shown with their source location, ``defm`` sequence, + superclasses, and fields. + +Superclasses are shown in the order processed, with indirect superclasses in +parentheses. Each field is shown with its value and the source location at +which it was set. +The ``defm`` sequence gives the locations of the ``defm`` statements that +were involved in generating the record, in the order they were invoked. \ No newline at end of file diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h index a0bd3ca2e0c19..28716b42f4ab3 100644 --- a/llvm/include/llvm/Support/SourceMgr.h +++ b/llvm/include/llvm/Support/SourceMgr.h @@ -172,6 +172,11 @@ class SourceMgr { std::pair getLineAndColumn(SMLoc Loc, unsigned BufferID = 0) const; + /// Get a string with the \p SMLoc filename and line number + /// formatted in the standard style. + std::string getFormattedLocationNoOffset(SMLoc Loc, + bool IncludePath = false) const; + /// Given a line and column number in a mapped buffer, turn it into an SMLoc. /// This will return a null SMLoc if the line/column location is invalid. SMLoc FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 1c46fce3f9941..2a02093ba5317 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -1518,7 +1518,10 @@ class Record { return SuperClasses; } - /// Append the direct super classes of this record to Classes. + /// Determine whether this record has the specified direct superclass. + bool hasDirectSuperClass(const Record *SuperClass) const; + + /// Append the direct superclasses of this record to Classes. void getDirectSuperClasses(SmallVectorImpl &Classes) const; bool isTemplateArg(Init *Name) const { @@ -1710,19 +1713,27 @@ class RecordKeeper { friend class RecordRecTy; using RecordMap = std::map, std::less<>>; + using GlobalMap = std::map>; + std::string InputFilename; RecordMap Classes, Defs; FoldingSet RecordTypePool; std::map> ExtraGlobals; unsigned AnonCounter = 0; public: + /// Get the main TableGen input file's name. + const std::string getInputFilename() const { return InputFilename; } + /// Get the map of classes. const RecordMap &getClasses() const { return Classes; } /// Get the map of records (defs). const RecordMap &getDefs() const { return Defs; } + /// Get the map of global variables. + const GlobalMap &getGlobals() const { return ExtraGlobals; } + /// Get the class with the specified name. Record *getClass(StringRef Name) const { auto I = Classes.find(Name); @@ -1743,6 +1754,10 @@ class RecordKeeper { return It == ExtraGlobals.end() ? nullptr : It->second; } + void saveInputFilename(std::string Filename) { + InputFilename = Filename; + } + void addClass(std::unique_ptr R) { bool Ins = Classes.insert(std::make_pair(std::string(R->getName()), std::move(R))).second; @@ -2017,6 +2032,7 @@ class HasReferenceResolver final : public Resolver { Init *resolve(Init *VarName) override; }; +void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS); void EmitJSON(RecordKeeper &RK, raw_ostream &OS); } // end namespace llvm diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index 9cc69732a9647..e50cf5b4a8344 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -180,7 +180,7 @@ std::pair SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { if (!BufferID) BufferID = FindBufferContainingLoc(Loc); - assert(BufferID && "Invalid Location!"); + assert(BufferID && "Invalid location!"); auto &SB = getBufferInfo(BufferID); const char *Ptr = Loc.getPointer(); @@ -193,6 +193,30 @@ SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs); } +// FIXME: Note that the formatting of source locations is spread between +// multiple functions, some in SourceMgr and some in SMDiagnostic. A better +// solution would be a general-purpose source location formatter +// in one of those two classes, or possibly in SMLoc. + +/// Get a string with the source location formatted in the standard +/// style, but without the line offset. If \p IncludePath is true, the path +/// is included. If false, only the file name and extension are included. +std::string SourceMgr::getFormattedLocationNoOffset(SMLoc Loc, + bool IncludePath) const { + auto BufferID = FindBufferContainingLoc(Loc); + assert(BufferID && "Invalid location!"); + auto FileSpec = getBufferInfo(BufferID).Buffer->getBufferIdentifier(); + + if (IncludePath) { + return FileSpec.str() + ":" + std::to_string(FindLineNumber(Loc, BufferID)); + } else { + auto I = FileSpec.find_last_of("/\\"); + I = (I == FileSpec.size()) ? 0 : (I + 1); + return FileSpec.substr(I).str() + ":" + + std::to_string(FindLineNumber(Loc, BufferID)); + } +} + /// Given a line and column number in a mapped buffer, turn it into an SMLoc. /// This will return a null SMLoc if the line/column location is invalid. SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt index 0a0a56c6285c8..c8ccbe85e36c2 100644 --- a/llvm/lib/TableGen/CMakeLists.txt +++ b/llvm/lib/TableGen/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMTableGen + DetailedRecordsBackend.cpp Error.cpp JSONBackend.cpp Main.cpp diff --git a/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/llvm/lib/TableGen/DetailedRecordsBackend.cpp new file mode 100644 index 0000000000000..1b6b675081edf --- /dev/null +++ b/llvm/lib/TableGen/DetailedRecordsBackend.cpp @@ -0,0 +1,204 @@ +//===- DetailedRecordBackend.cpp - Detailed Records Report -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This Tablegen backend prints a report that includes all the global +// variables, classes, and records in complete detail. It includes more +// detail than the default TableGen printer backend. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include +#include +#include +#include + +#define DEBUG_TYPE "detailed-records-backend" + +#define NL "\n" + +using namespace llvm; + +namespace { + +class DetailedRecordsEmitter { +private: + RecordKeeper &Records; + +public: + DetailedRecordsEmitter(RecordKeeper &RK) : Records(RK) {} + + void run(raw_ostream &OS); + void printReportHeading(raw_ostream &OS); + void printVariables(raw_ostream &OS); + void printClasses(raw_ostream &OS); + void printRecords(raw_ostream &OS); + void printSectionHeading(std::string Title, int Count, raw_ostream &OS); + void printDefms(Record *Rec, raw_ostream &OS); + void printTemplateArgs(Record *Rec, raw_ostream &OS); + void printSuperclasses(Record *Rec, raw_ostream &OS); + void printFields(Record *Rec, raw_ostream &OS); + std::string formatLocation(const SMLoc Loc); +}; // emitter class + +} // anonymous namespace + +// Print the report. +void DetailedRecordsEmitter::run(raw_ostream &OS) { + printReportHeading(OS); + printVariables(OS); + printClasses(OS); + printRecords(OS); +} + +// Print the report heading, including the source file name. +void DetailedRecordsEmitter::printReportHeading(raw_ostream &OS) { + OS << formatv("DETAILED RECORDS for file {0}\n", Records.getInputFilename()); +} + +// Print the global variables. +void DetailedRecordsEmitter::printVariables(raw_ostream &OS) { + const auto GlobalList = Records.getGlobals(); + printSectionHeading("Global Variables", GlobalList.size(), OS); + + OS << NL; + for (const auto &Var : GlobalList) { + OS << Var.first << " = " << Var.second->getAsString() << NL; + } +} + +// Print the classes, including the template arguments, superclasses, +// and fields. +void DetailedRecordsEmitter::printClasses(raw_ostream &OS) { + const auto &ClassList = Records.getClasses(); + printSectionHeading("Classes", ClassList.size(), OS); + + for (const auto &ClassPair : ClassList) { + auto *const Class = ClassPair.second.get(); + OS << formatv("\n{0} |{1}|\n", Class->getNameInitAsString(), + SrcMgr.getFormattedLocationNoOffset(Class->getLoc().front())); + printTemplateArgs(Class, OS); + printSuperclasses(Class, OS); + printFields(Class, OS); + } +} + +// Print the records, including the defm sequences, supercasses, +// and fields. +void DetailedRecordsEmitter::printRecords(raw_ostream &OS) { + const auto &RecordList = Records.getDefs(); + printSectionHeading("Records", RecordList.size(), OS); + + for (const auto &RecPair : RecordList) { + auto *const Rec = RecPair.second.get(); + OS << formatv("\n{0} |{1}|\n", Rec->getNameInitAsString(), + SrcMgr.getFormattedLocationNoOffset(Rec->getLoc().front())); + printDefms(Rec, OS); + printSuperclasses(Rec, OS); + printFields(Rec, OS); + } +} + +// Print a section heading with the name of the section and +// the item count. +void DetailedRecordsEmitter::printSectionHeading(std::string Title, int Count, + raw_ostream &OS) { + OS << formatv("\n{0} {1} ({2}) {0}\n", "--------------------", Title, Count); +} + +// Print the record's defm source locations, if any. Note that they +// are stored in the reverse order of their invocation. +void DetailedRecordsEmitter::printDefms(Record *Rec, raw_ostream &OS) { + const auto &LocList = Rec->getLoc(); + if (LocList.size() < 2) + return; + + OS << " Defm sequence:"; + for (unsigned I = LocList.size() - 1; I >= 1; --I) { + OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(LocList[I])); + } + OS << NL; +} + +// Print the template arguments of a class. +void DetailedRecordsEmitter::printTemplateArgs(Record *Rec, + raw_ostream &OS) { + ArrayRef Args = Rec->getTemplateArgs(); + if (Args.empty()) { + OS << " Template args: (none)\n"; + return; + } + + OS << " Template args:\n"; + for (const Init *ArgName : Args) { + const RecordVal *Value = Rec->getValue(ArgName); + assert(Value && "Template argument value not found."); + OS << " "; + Value->print(OS, false); + OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(Value->getLoc())); + OS << NL; + } +} + +// Print the superclasses of a class or record. Indirect superclasses +// are enclosed in parentheses. +void DetailedRecordsEmitter::printSuperclasses(Record *Rec, raw_ostream &OS) { + ArrayRef> Superclasses = Rec->getSuperClasses(); + if (Superclasses.empty()) { + OS << " Superclasses: (none)\n"; + return; + } + + OS << " Superclasses:"; + for (const auto &SuperclassPair : Superclasses) { + auto *ClassRec = SuperclassPair.first; + if (Rec->hasDirectSuperClass(ClassRec)) + OS << formatv(" {0}", ClassRec->getNameInitAsString()); + else + OS << formatv(" ({0})", ClassRec->getNameInitAsString()); + } + OS << NL; +} + +// Print the fields of a class or record, including their source locations. +void DetailedRecordsEmitter::printFields(Record *Rec, raw_ostream &OS) { + const auto &ValueList = Rec->getValues(); + if (ValueList.empty()) { + OS << " Fields: (none)\n"; + return; + } + + OS << " Fields:\n"; + for (const RecordVal &Value : ValueList) + if (!Rec->isTemplateArg(Value.getNameInit())) { + OS << " "; + Value.print(OS, false); + OS << formatv(" |{0}|\n", + SrcMgr.getFormattedLocationNoOffset(Value.getLoc())); + } +} + +namespace llvm { + +// This function is called by TableGen after parsing the files. + +void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS) { + // Instantiate the emitter class and invoke run(). + DetailedRecordsEmitter(RK).run(OS); +} + +} // namespace llvm diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 77f1b61cf9301..d7c73d2f6f022 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -90,6 +90,8 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) { return reportError(argv0, "Could not open input file '" + InputFilename + "': " + EC.message() + "\n"); + Records.saveInputFilename(InputFilename); + // Tell SrcMgr about this buffer, which is what TGParser will pick up. SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), SMLoc()); diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index ae8fe0316c42c..260cca6b59e59 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -2144,12 +2144,27 @@ void Record::setName(Init *NewName) { // this. See TGParser::ParseDef and TGParser::ParseDefm. } +// NOTE for the next two functions: +// Superclasses are in post-order, so the final one is a direct +// superclass. All of its transitive superclases immediately precede it, +// so we can step through the direct superclasses in reverse order. + +bool Record::hasDirectSuperClass(const Record *Superclass) const { + ArrayRef> SCs = getSuperClasses(); + + for (int I = SCs.size() - 1; I >= 0; --I) { + const Record *SC = SCs[I].first; + if (SC == Superclass) + return true; + I -= SC->getSuperClasses().size(); + } + + return false; +} + void Record::getDirectSuperClasses(SmallVectorImpl &Classes) const { ArrayRef> SCs = getSuperClasses(); - // Superclasses are in post-order, so the final one is a direct - // superclass. All of its transitive superclases immediately precede it, - // so we can step through the direct superclasses in reverse order. while (!SCs.empty()) { Record *SC = SCs.back().first; SCs = SCs.drop_back(1 + SC->getSuperClasses().size()); diff --git a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp index bf1fccdee4045..4ce88e003e653 100644 --- a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp +++ b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp @@ -41,9 +41,9 @@ class SkeletonEmitter { SkeletonEmitter(RecordKeeper &RK) : Records(RK) {} void run(raw_ostream &OS); -}; // End emitter class. +}; // emitter class -} // End anonymous namespace. +} // anonymous namespace void SkeletonEmitter::run(raw_ostream &OS) { emitSourceFileHeader("Skeleton data structures", OS); @@ -61,4 +61,4 @@ void EmitSkeleton(RecordKeeper &RK, raw_ostream &OS) { SkeletonEmitter(RK).run(OS); } -} // End llvm namespace. +} // namespace llvm diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index 5215c30b707f4..65e63ff487f5d 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -21,6 +21,7 @@ using namespace llvm; enum ActionType { PrintRecords, + PrintDetailedRecords, DumpJSON, GenEmitter, GenRegisterInfo, @@ -75,6 +76,8 @@ cl::opt Action( cl::values( clEnumValN(PrintRecords, "print-records", "Print all records to stdout (default)"), + clEnumValN(PrintDetailedRecords, "print-detailed-records", + "Print full details of all records to stdout"), clEnumValN(DumpJSON, "dump-json", "Dump all records as machine-readable JSON"), clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), @@ -152,6 +155,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case PrintRecords: OS << Records; // No argument, dump all contents break; + case PrintDetailedRecords: + EmitDetailedRecords(Records, OS); + break; case DumpJSON: EmitJSON(Records, OS); break; From d9e397208002281322cb72383edff24e21dbeb27 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 2 Oct 2020 14:24:01 +0000 Subject: [PATCH 379/544] [gn build] Port 0c1bb4f8851 --- llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn index 29977c0f3f636..f8e7c4ff25428 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn @@ -2,6 +2,7 @@ static_library("TableGen") { output_name = "LLVMTableGen" deps = [ "//llvm/lib/Support" ] sources = [ + "DetailedRecordsBackend.cpp", "Error.cpp", "JSONBackend.cpp", "Main.cpp", From 2a9ce60de98e53198047daaeeec3cf09ece4e693 Mon Sep 17 00:00:00 2001 From: Kamil Rytarowski Date: Fri, 2 Oct 2020 16:13:09 +0200 Subject: [PATCH 380/544] [compiler-rt] [netbsd] Improve the portability of ThreadSelfTlsTcb Use __lwp_gettcb_fast() and __lwp_getprivate_fast(), as _lwp_getprivate() can be a biased pointer and invalid for use in this function on all CPUs. --- .../lib/sanitizer_common/sanitizer_linux_libcdep.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index fe08ffc1bb4a6..f95f03b089a3a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -28,6 +28,10 @@ #include "sanitizer_placement_new.h" #include "sanitizer_procmaps.h" +#if SANITIZER_NETBSD +#define _RTLD_SOURCE // for __lwp_gettcb_fast() / __lwp_getprivate_fast() +#endif + #include // for dlsym() #include #include @@ -412,7 +416,13 @@ uptr ThreadSelf() { #if SANITIZER_NETBSD static struct tls_tcb * ThreadSelfTlsTcb() { - return (struct tls_tcb *)_lwp_getprivate(); + struct tls_tcb *tcb = nullptr; +#ifdef __HAVE___LWP_GETTCB_FAST + tcb = (struct tls_tcb *)__lwp_gettcb_fast(); +#elif defined(__HAVE___LWP_GETPRIVATE_FAST) + tcb = (struct tls_tcb *)__lwp_getprivate_fast(); +#endif + return tcb; } uptr ThreadSelf() { From c87c017a4c47c47b002b9f55f25285298cd07093 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 2 Oct 2020 10:32:52 -0400 Subject: [PATCH 381/544] Fix failure in test hip-macros.hip requires amdgpu-registered-target. --- clang/test/Driver/hip-macros.hip | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip index 00dcca17a08aa..3105c25b8b9d5 100644 --- a/clang/test/Driver/hip-macros.hip +++ b/clang/test/Driver/hip-macros.hip @@ -1,3 +1,4 @@ +// REQUIRES: clang-driver, amdgpu-registered-target // RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ // RUN: --cuda-device-only -nogpuinc -nogpulib \ // RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s From 7b19cd06d732e9c3db326dc9b9c82d66dc624fd2 Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Thu, 1 Oct 2020 15:09:57 +0700 Subject: [PATCH 382/544] [Statepoints][ISEL] visitGCRelocate: chain to current DAG root. This is similar to D87251, but for CopyFromRegs nodes. Even for local statepoint uses we generate CopyToRegs/CopyFromRegs nodes. When generating CopyFromRegs in visitGCRelocate, we must chain to current DAG root, not EntryNode, to ensure proper ordering of copy w.r.t. statepoint node producing result for it. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D88639 --- .../SelectionDAG/StatepointLowering.cpp | 5 ++- .../CodeGen/X86/statepoint-vreg-details.ll | 32 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 701360bf38c67..9cb7f45db096a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1123,7 +1123,10 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Relocate.getType(), None); // This is not an ABI copy. - SDValue Chain = DAG.getEntryNode(); + // We generate copy to/from regs even for local uses, hence we must + // chain with current root to ensure proper ordering of copies w.r.t. + // statepoint. + SDValue Chain = DAG.getRoot(); SDValue Relocation = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, nullptr); setValue(&Relocate, Relocation); diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll index 05b0402440d24..eb260ab5aaf6e 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -18,6 +18,7 @@ declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*) declare i32* @fake_personality_function() declare i32 @foo(i32, i8 addrspace(1)*, i32, i32, i32) +declare void @bar(i8 addrspace(1)*, i8 addrspace(1)*) ; test most simple relocate define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { @@ -378,6 +379,36 @@ exceptional_return: ; preds = %entry unreachable } +; Test that CopyFromReg emitted during ISEL processing of gc.relocate are properly ordered w.r.t. statepoint. +define i8 addrspace(1)* @test_isel_sched(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i32 %2) gc "statepoint-example" { +;CHECK-VREG-LABEL: name: test_isel_sched +;CHECK-VREG: bb.0.entry: +;CHECK-VREG: %2:gr32 = COPY $edx +;CHECK-VREG: %1:gr64 = COPY $rsi +;CHECK-VREG: %0:gr64 = COPY $rdi +;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags +;CHECK-VREG: %5:gr64 = CMOV64rr %1, %0, 4, implicit $eflags +;CHECK-VREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.1) +;CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %1 :: (store 8 into %stack.0) +;CHECK-VREG: %6:gr32 = MOV32r0 implicit-def dead $eflags +;CHECK-VREG: %7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit +;CHECK-VREG: $rdi = COPY %7 +;CHECK-VREG: $rsi = COPY %5 +;CHECK-VREG: %3:gr64, %4:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, %1(tied-def 0), 1, 8, %stack.1, 0, %0(tied-def 1), csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1) +;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags +;CHECK-VREG: %8:gr64 = CMOV64rr %3, %4, 4, implicit $eflags +;CHECK-VREG: $rax = COPY %8 +;CHECK-VREG: RET 0, $rax +entry: + %cmp = icmp eq i32 %2, 0 + %ptr = select i1 %cmp, i8 addrspace(1)* %0, i8 addrspace(1)* %1 + %token = call token (i64, i32, void (i8 addrspace(1)*, i8 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8p1i8f(i64 10, i32 0, void (i8 addrspace(1)*, i8 addrspace(1)*)* @bar, i32 2, i32 0, i8 addrspace(1)* null, i8 addrspace(1)* %ptr, i32 0, i32 0) [ "deopt"(), "gc-live"(i8 addrspace(1)* %0, i8 addrspace(1)* %1) ] + %rel0 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 0, i32 0) + %rel1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 1, i32 1) + %res = select i1 %cmp, i8 addrspace(1)* %rel0, i8 addrspace(1)* %rel1 + ret i8 addrspace(1)* %res +} + declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) @@ -387,4 +418,5 @@ declare i1 @llvm.experimental.gc.result.i1(token) declare void @__llvm_deoptimize(i32) declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 immarg, i32 immarg, void (i32)*, i32 immarg, i32 immarg, ...) declare token @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32i32i32f(i64 immarg, i32 immarg, i32 (i32, i8 addrspace(1)*, i32, i32, i32)*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8p1i8f(i64 immarg, i32 immarg, void (i8 addrspace(1)*, i8 addrspace(1)*)*, i32 immarg, i32 immarg, ...) From f2c6bfa350de142e4d63808d03335f69bd136d6a Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 23 Jul 2020 16:22:48 +0200 Subject: [PATCH 383/544] Fix interaction between stack alignment and inline-asm stack clash protection As reported in https://github.com/rust-lang/rust/issues/70143 alignment is not taken into account when doing the probing. Fix that by adjusting the first probe if the stack align is small, or by extending the dynamic probing if the alignment is large. Differential Revision: https://reviews.llvm.org/D84419 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 222 ++++++++++++++++-- llvm/lib/Target/X86/X86FrameLowering.h | 8 +- .../X86/stack-clash-large-large-align.ll | 88 +++++++ .../CodeGen/X86/stack-clash-no-free-probe.ll | 27 --- .../stack-clash-small-alloc-medium-align.ll | 135 +++++++++++ .../X86/stack-clash-small-large-align.ll | 83 +++++++ 6 files changed, 512 insertions(+), 51 deletions(-) create mode 100644 llvm/test/CodeGen/X86/stack-clash-large-large-align.ll delete mode 100644 llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-large-align.ll diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 90265ddf344a1..6d196a6228373 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric( const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); uint64_t ProbeChunk = StackProbeSize * 8; + uint64_t MaxAlign = + TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; + // Synthesize a loop or unroll it, depending on the number of iterations. + // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left + // between the unaligned rsp and current rsp. if (Offset > ProbeChunk) { - emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset); + emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, + MaxAlign % StackProbeSize); } else { - emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset); + emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, + MaxAlign % StackProbeSize); } } void X86FrameLowering::emitStackProbeInlineGenericBlock( MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - uint64_t Offset) const { + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, + uint64_t AlignOffset) const { const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + uint64_t CurrentOffset = 0; - // 0 Thanks to return address being saved on the stack - uint64_t CurrentProbeOffset = 0; - // For the first N - 1 pages, just probe. I tried to take advantage of + assert(AlignOffset < StackProbeSize); + + // If the offset is so small it fits within a page, there's nothing to do. + if (StackProbeSize < Offset + AlignOffset) { + + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize - AlignOffset) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + NumFrameExtraProbe++; + CurrentOffset = StackProbeSize - AlignOffset; + } + + // For the next N - 1 pages, just probe. I tried to take advantage of // natural probes but it implies much more logic and there was very few // interesting natural probes to interleave. while (CurrentOffset + StackProbeSize < Offset) { @@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( .setMIFlag(MachineInstr::FrameSetup); NumFrameExtraProbe++; CurrentOffset += StackProbeSize; - CurrentProbeOffset += StackProbeSize; } + // No need to probe the tail, it is smaller than a Page. uint64_t ChunkSize = Offset - CurrentOffset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) @@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( void X86FrameLowering::emitStackProbeInlineGenericLoop( MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - uint64_t Offset) const { + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, + uint64_t AlignOffset) const { assert(Offset && "null offset"); const X86Subtarget &STI = MF.getSubtarget(); @@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + if (AlignOffset) { + if (AlignOffset < StackProbeSize) { + // Perform a first smaller allocation followed by a probe. + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(AlignOffset) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + NumFrameExtraProbe++; + Offset -= AlignOffset; + } + } + // Synthesize a loop NumFrameLoopProbe++; const BasicBlock *LLVM_BB = MBB.getBasicBlock(); @@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( // save loop bound { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); - BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed) + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); + BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) .addReg(FinalStackProbed) .addImm(Offset / StackProbeSize * StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); @@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( // allocate a page { - const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); - BuildMI(testMBB, DL, TII.get(Opc), StackPtr) + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) .addReg(StackPtr) .addImm(StackProbeSize) .setMIFlag(MachineInstr::FrameSetup); @@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, uint64_t MaxAlign) const { uint64_t Val = -MaxAlign; unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); - MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) - .addReg(Reg) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + MachineFunction &MF = *MBB.getParent(); + const X86Subtarget &STI = MF.getSubtarget(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); + + // We want to make sure that (in worst case) less than StackProbeSize bytes + // are not probed after the AND. This assumption is used in + // emitStackProbeInlineGeneric. + if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { + { + NumFrameLoopProbe++; + MachineBasicBlock *entryMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *headMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *bodyMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *footMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + + MachineFunction::iterator MBBIter = MBB.getIterator(); + MF.insert(MBBIter, entryMBB); + MF.insert(MBBIter, headMBB); + MF.insert(MBBIter, bodyMBB); + MF.insert(MBBIter, footMBB); + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + + // Setup entry block + { + + entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI); + BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + MachineInstr *MI = + BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) + .addReg(FinalStackProbed) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + + BuildMI(entryMBB, DL, + TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) + .addMBB(&MBB) + .addImm(X86::COND_E) + .setMIFlag(MachineInstr::FrameSetup); + entryMBB->addSuccessor(headMBB); + entryMBB->addSuccessor(&MBB); + } + + // Loop entry block + + { + const unsigned SUBOpc = + getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + + BuildMI(headMBB, DL, + TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + // jump + BuildMI(headMBB, DL, TII.get(X86::JCC_1)) + .addMBB(footMBB) + .addImm(X86::COND_B) + .setMIFlag(MachineInstr::FrameSetup); + + headMBB->addSuccessor(bodyMBB); + headMBB->addSuccessor(footMBB); + } + + // setup loop body + { + addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + + const unsigned SUBOpc = + getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + + // cmp with stack pointer bound + BuildMI(bodyMBB, DL, + TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + // jump + BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) + .addMBB(bodyMBB) + .addImm(X86::COND_B) + .setMIFlag(MachineInstr::FrameSetup); + bodyMBB->addSuccessor(bodyMBB); + bodyMBB->addSuccessor(footMBB); + } + + // setup loop footer + { + BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) + .addReg(FinalStackProbed) + .setMIFlag(MachineInstr::FrameSetup); + addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + footMBB->addSuccessor(&MBB); + } + + recomputeLiveIns(*headMBB); + recomputeLiveIns(*bodyMBB); + recomputeLiveIns(*footMBB); + recomputeLiveIns(MBB); + } + } else { + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) + .addReg(Reg) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } } bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index c0b4be95f88d3..bb2e83205e717 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -213,14 +213,14 @@ class X86FrameLowering : public TargetFrameLowering { void emitStackProbeInlineGenericBlock(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, - uint64_t Offset) const; + const DebugLoc &DL, uint64_t Offset, + uint64_t Align) const; void emitStackProbeInlineGenericLoop(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, - uint64_t Offset) const; + const DebugLoc &DL, uint64_t Offset, + uint64_t Align) const; /// Emit a stub to later inline the target stack probe. MachineInstr *emitStackProbeInlineStub(MachineFunction &MF, diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll new file mode 100644 index 0000000000000..6c981cb4ac910 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s | FileCheck %s + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo_noprotect() local_unnamed_addr { +; CHECK-LABEL: foo_noprotect: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000 +; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000 +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl $1, 28792(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + + %a = alloca i32, i64 18000, align 4096 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 7198 + store volatile i32 1, i32* %b0 + store volatile i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} + +define i32 @foo_protect() local_unnamed_addr #0 { +; CHECK-LABEL: foo_protect: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT:# %bb.1: +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_3 +; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_2 +; CHECK-NEXT:.LBB1_3: +; CHECK-NEXT: movq %r11, %rsp +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT:.LBB1_4: +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000 +; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT:# %bb.6: +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl $1, 28792(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + + + + %a = alloca i32, i64 18000, align 4096 + %b0 = getelementptr inbounds i32, i32* %a, i64 98 + %b1 = getelementptr inbounds i32, i32* %a, i64 7198 + store volatile i32 1, i32* %b0 + store volatile i32 1, i32* %b1 + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} diff --git a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll b/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll deleted file mode 100644 index 652acbdf00ba6..0000000000000 --- a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: llc < %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @foo(i64 %i) local_unnamed_addr #0 { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 -; CHECK-NEXT: .cfi_def_cfa_offset 7888 -; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4) -; CHECK-NEXT: movl -128(%rsp), %eax -; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - - %a = alloca i32, i32 2000, align 16 - %b = getelementptr inbounds i32, i32* %a, i64 %i - store volatile i32 1, i32* %b - %c = load volatile i32, i32* %a - ret i32 %c -} - -attributes #0 = {"probe-stack"="inline-asm"} - diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll new file mode 100644 index 0000000000000..eafa86f1eba90 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll @@ -0,0 +1,135 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; | case1 | alloca + align < probe_size +define i32 @foo1(i64 %i) local_unnamed_addr #0 { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-64, %rsp +; CHECK-NEXT: subq $832, %rsp # imm = 0x340 +; CHECK-NEXT: movl $1, (%rsp,%rdi,4) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + %a = alloca i32, i32 200, align 64 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +; | case2 | alloca > probe_size, align > probe_size +define i32 @foo2(i64 %i) local_unnamed_addr #0 { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800 +; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $2048, %rsp # imm = 0x800 +; CHECK-NEXT: movl $1, (%rsp,%rdi,4) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + %a = alloca i32, i32 2000, align 2048 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size +define i32 @foo3(i64 %i) local_unnamed_addr #0 { +; CHECK-LABEL: foo3: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 +; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $1024, %rsp # imm = 0x400 +; CHECK-NEXT: movl $1, (%rsp,%rdi,4) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + + %a = alloca i32, i32 1000, align 1024 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca +define i32 @foo4(i64 %i) local_unnamed_addr #0 { +; CHECK-LABEL: foo4: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-64, %rsp +; CHECK-NEXT: subq $896, %rsp # imm = 0x380 +; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: movl $1, (%rbx,%rdi,4) +; CHECK-NEXT: movl (%rbx), %ecx +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: leaq 15(,%rcx,4), %rcx +; CHECK-NEXT: andq $-16, %rcx +; CHECK-NEXT: subq %rcx, %rax +; CHECK-NEXT: cmpq %rsp, %rax +; CHECK-NEXT: jle .LBB3_3 +; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %rax +; CHECK-NEXT: jg .LBB3_2 +; CHECK-NEXT:.LBB3_3: +; CHECK-NEXT: andq $-64, %rax +; CHECK-NEXT: movq %rax, %rsp +; CHECK-NEXT: movl (%rax), %eax +; CHECK-NEXT: leaq -8(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + %a = alloca i32, i32 200, align 64 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + %d = alloca i32, i32 %c, align 64 + %e = load volatile i32, i32* %d + ret i32 %e +} + +attributes #0 = {"probe-stack"="inline-asm"} + diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll new file mode 100644 index 0000000000000..e608bab904156 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s | FileCheck %s + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo_noprotect() local_unnamed_addr { +; CHECK-LABEL: foo_noprotect: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-65536, %rsp +; CHECK-NEXT: subq $65536, %rsp +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + + + %a = alloca i32, i64 100, align 65536 + %b = getelementptr inbounds i32, i32* %a, i64 98 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +define i32 @foo_protect() local_unnamed_addr #0 { +; CHECK-LABEL: foo_protect: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT:# %bb.1: +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_3 +; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: cmpq %rsp, %r11 +; CHECK-NEXT: jb .LBB1_2 +; CHECK-NEXT:.LBB1_3: +; CHECK-NEXT: movq %r11, %rsp +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT:.LBB1_4: +; CHECK-NEXT: movq %rsp, %r11 +; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000 +; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: cmpq %r11, %rsp +; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT:# %bb.6: +; CHECK-NEXT: movl $1, 392(%rsp) +; CHECK-NEXT: movl (%rsp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + + + + + %a = alloca i32, i64 100, align 65536 + %b = getelementptr inbounds i32, i32* %a, i64 98 + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} From 86b14d0969ebdf51674df6d41c5e88a8d34879e8 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 2 Oct 2020 10:02:53 -0400 Subject: [PATCH 384/544] [mlir] Attempt to appease gcc-5 const char* -> StringLiteral conversion issu --- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 7f4e2ffa5262f..37d8d73e3dc9b 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2644,7 +2644,7 @@ static void printOpWithOffsetsSizesAndStrides( OpAsmPrinter &p, OpType op, llvm::function_ref printExtraOperands = [](OpAsmPrinter &p, OpType op) {}, - StringLiteral resultTypeKeyword = "to") { + StringRef resultTypeKeyword = "to") { int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1; p << op.getOperation()->getName().getStringRef().drop_front(stdDotLen) << ' '; p << op.source(); @@ -2677,7 +2677,7 @@ static ParseResult parseOpWithOffsetsSizesAndStrides( std::function parseExtraOperand = nullptr, - StringLiteral resultTypeKeyword = "to") { + StringRef resultTypeKeyword = "to") { OpAsmParser::OperandType srcInfo, dstInfo; SmallVector offsetsInfo, sizesInfo, stridesInfo; auto indexType = parser.getBuilder().getIndexType(); From a611f9a5c689995f21204139eb2667b8ce4c9dd2 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Fri, 2 Oct 2020 08:42:13 -0700 Subject: [PATCH 385/544] [mlir] Fix call op conversion in bare-ptr calling convention We hit an llvm_unreachable related to unranked memrefs for call ops with scalar types. Removing the llvm_unreachable since the conversion should gracefully bail out in the presence of unranked memrefs. Adding tests to verify that. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D88709 --- .../StandardToLLVM/StandardToLLVM.cpp | 8 ++--- .../convert-static-memref-ops.mlir | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index c77c0b529cafd..37d0c940aa267 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -436,14 +436,10 @@ void LLVMTypeConverter::promoteBarePtrsToDescriptors( SmallVectorImpl &values) { assert(stdTypes.size() == values.size() && "The number of types and values doesn't match"); - for (unsigned i = 0, end = values.size(); i < end; ++i) { - Type stdTy = stdTypes[i]; - if (auto memrefTy = stdTy.dyn_cast()) + for (unsigned i = 0, end = values.size(); i < end; ++i) + if (auto memrefTy = stdTypes[i].dyn_cast()) values[i] = MemRefDescriptor::fromStaticShape(rewriter, loc, *this, memrefTy, values[i]); - else - llvm_unreachable("Unranked memrefs are not supported"); - } } ConvertToLLVMPattern::ConvertToLLVMPattern(StringRef rootOpName, diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir index 5dd36ba6d2acc..b93446f00d2ee 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -416,3 +416,33 @@ func @check_memref_func_call(%in : memref<10xi8>) -> memref<20xi8> { // BAREPTR-NEXT: llvm.return %[[res]] : !llvm.ptr return %res : memref<20xi8> } + +// ----- + +// BAREPTR: llvm.func @goo(!llvm.float) -> !llvm.float +func @goo(f32) -> f32 + +// BAREPTR-LABEL: func @check_scalar_func_call +// BAREPTR-SAME: %[[in:.*]]: !llvm.float) +func @check_scalar_func_call(%in : f32) { + // BAREPTR-NEXT: %[[call:.*]] = llvm.call @goo(%[[in]]) : (!llvm.float) -> !llvm.float + %res = call @goo(%in) : (f32) -> (f32) + return +} + +// ----- + +// Unranked memrefs are currently not supported in the bare-ptr calling +// convention. Check that the conversion to the LLVM-IR dialect doesn't happen +// in the presence of unranked memrefs when using such a calling convention. + +// BAREPTR: func @hoo(memref<*xi8>) -> memref<*xi8> +func @hoo(memref<*xi8>) -> memref<*xi8> + +// BAREPTR-LABEL: func @check_unranked_memref_func_call(%{{.*}}: memref<*xi8>) -> memref<*xi8> +func @check_unranked_memref_func_call(%in: memref<*xi8>) -> memref<*xi8> { + // BAREPTR-NEXT: call @hoo(%{{.*}}) : (memref<*xi8>) -> memref<*xi8> + %res = call @hoo(%in) : (memref<*xi8>) -> memref<*xi8> + // BAREPTR-NEXT: return %{{.*}} : memref<*xi8> + return %res : memref<*xi8> +} From db2a646c5f002cc16d02d6fac0b2d715cdd4a809 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 30 Sep 2020 14:57:47 +0200 Subject: [PATCH 386/544] [clangd] Add bencmark for measuring latency of DecisionForest model. Differential Revision: https://reviews.llvm.org/D88590 --- .../clangd/benchmarks/CMakeLists.txt | 2 + .../benchmarks/CompletionModel/CMakeLists.txt | 9 ++ .../DecisionForestBenchmark.cpp | 85 +++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt create mode 100644 clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp diff --git a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt index 1f3d88b42bce0..b62ffd7a1ad16 100644 --- a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt +++ b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt @@ -1,5 +1,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) +add_subdirectory(CompletionModel) + add_benchmark(IndexBenchmark IndexBenchmark.cpp) target_link_libraries(IndexBenchmark diff --git a/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt b/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt new file mode 100644 index 0000000000000..3998aa1225338 --- /dev/null +++ b/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) + +add_benchmark(DecisionForestBenchmark DecisionForestBenchmark.cpp) + +target_link_libraries(DecisionForestBenchmark + PRIVATE + clangDaemon + LLVMSupport + ) diff --git a/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp b/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp new file mode 100644 index 0000000000000..69ce65e08b772 --- /dev/null +++ b/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp @@ -0,0 +1,85 @@ +//===--- DecisionForestBenchmark.cpp ------------*- C++ -*-===// +// +// Benchmark for code completion ranking latency. +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Usage: +// ninja DecisionForestBenchmark && \ +// tools/clang/tools/extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark +//===----------------------------------------------------------------------===// + +#include "CompletionModel.h" +#include "benchmark/benchmark.h" +#include "llvm/ADT/StringRef.h" + +#include + +namespace clang { +namespace clangd { +namespace { +std::vector generateRandomDataset(int NumExamples) { + auto FlipCoin = [&](float Probability) { + return rand() % 1000 <= Probability * 1000; + }; + auto RandInt = [&](int Max) { return rand() % Max; }; + auto RandFloat = [&](float Max = 1.0) { + return rand() % 1000 / 1000.0 * Max; + }; + + std::vector Examples; + Examples.reserve(NumExamples); + for (int I = 0; I < NumExamples; ++I) { + Example E; + E.setIsDeprecated(FlipCoin(0.1)); // Boolean. + E.setIsReservedName(FlipCoin(0.1)); // Boolean. + E.setIsImplementationDetail(FlipCoin(0.3)); // Boolean. + E.setNumReferences(RandInt(10000)); // Can be large integer. + E.setSymbolCategory(RandInt(10)); // 10 Symbol Category. + + E.setIsNameInContext(FlipCoin(0.5)); // Boolean. + E.setIsForbidden(FlipCoin(0.1)); // Boolean. + E.setIsInBaseClass(FlipCoin(0.3)); // Boolean. + E.setFileProximityDistance( + FlipCoin(0.1) ? 999999 // Sometimes file distance is not available. + : RandInt(20)); + E.setSemaFileProximityScore(RandFloat(1)); // Float in range [0,1]. + E.setSymbolScopeDistance( + FlipCoin(0.1) ? 999999 // Sometimes scope distance is not available. + : RandInt(20)); + E.setSemaSaysInScope(FlipCoin(0.5)); // Boolean. + E.setScope(RandInt(4)); // 4 Scopes. + E.setContextKind(RandInt(32)); // 32 Context kinds. + E.setIsInstanceMember(FlipCoin(0.5)); // Boolean. + E.setHadContextType(FlipCoin(0.6)); // Boolean. + E.setHadSymbolType(FlipCoin(0.6)); // Boolean. + E.setTypeMatchesPreferred(FlipCoin(0.5)); // Boolean. + E.setFilterLength(RandInt(15)); + Examples.push_back(E); + } + return Examples; +} + +void runDecisionForestPrediciton(const std::vector Examples) { + for (const Example &E : Examples) + Evaluate(E); +} + +static void decisionForestPredict(benchmark::State &State) { + srand(0); + for (auto _ : State) { + State.PauseTiming(); + const std::vector Examples = generateRandomDataset(1000000); + State.ResumeTiming(); + runDecisionForestPrediciton(Examples); + } +} +BENCHMARK(decisionForestPredict); + +} // namespace +} // namespace clangd +} // namespace clang + +BENCHMARK_MAIN(); From f192594956281744f67c4535bf2cce922f155aff Mon Sep 17 00:00:00 2001 From: Vinay Madhusudan Date: Fri, 2 Oct 2020 17:11:02 +0100 Subject: [PATCH 387/544] [AArch64] Generate dot for v16i8 sum reduction to i32 Convert VECREDUCE_ADD( EXTEND(v16i8_type) ) to VECREDUCE_ADD( DOTv16i8(v16i8_type) ) whenever the result type is i32. This gains in one of the SPECCPU 2017 benchmark. This partially solves the bug: https://bugs.llvm.org/show_bug.cgi?id=46888 Meta ticket: https://bugs.llvm.org/show_bug.cgi?id=46929 Differential Revision: https://reviews.llvm.org/D88577 --- .../Target/AArch64/AArch64ISelLowering.cpp | 34 +++++++++++++++++++ llvm/test/CodeGen/AArch64/neon-dot-product.ll | 28 +++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d7d326fa019dc..f513dce73277b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -790,6 +790,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::VECREDUCE_ADD); setTargetDAGCombine(ISD::GlobalAddress); @@ -10989,6 +10990,37 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// VECREDUCE_ADD( EXTEND(v16i8_type) ) to +// VECREDUCE_ADD( DOTv16i8(v16i8_type) ) +static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *ST) { + SDValue Op0 = N->getOperand(0); + if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32) + return SDValue(); + + if (Op0.getValueType().getVectorElementType() != MVT::i32) + return SDValue(); + + unsigned ExtOpcode = Op0.getOpcode(); + if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND) + return SDValue(); + + EVT Op0VT = Op0.getOperand(0).getValueType(); + if (Op0VT != MVT::v16i8) + return SDValue(); + + SDLoc DL(Op0); + SDValue Ones = DAG.getConstant(1, DL, Op0VT); + SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32); + auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND) + ? Intrinsic::aarch64_neon_udot + : Intrinsic::aarch64_neon_sdot; + SDValue Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Zeros.getValueType(), + DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros, + Ones, Op0.getOperand(0)); + return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot); +} + static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -14671,6 +14703,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performPostLD1Combine(N, DCI, true); case ISD::EXTRACT_VECTOR_ELT: return performExtractVectorEltCombine(N, DAG); + case ISD::VECREDUCE_ADD: + return performVecReduceAddCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { diff --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll index b6131e1d045ed..eef89ab6ff391 100644 --- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll @@ -255,6 +255,20 @@ entry: ret i32 %op.extra } +define i32 @test_udot_v16i8_2(i8* nocapture readonly %a1) { +; CHECK-LABEL: test_udot_v16i8_2: +; CHECK: movi {{v[0-9]+}}.16b, #1 +; CHECK: movi {{v[0-9]+}}.2d, #0000000000000000 +; CHECK: udot {{v[0-9]+}}.4s, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: addv s0, {{v[0-9]+}}.4s +entry: + %0 = bitcast i8* %a1 to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0 + %2 = zext <16 x i8> %1 to <16 x i32> + %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2) + ret i32 %3 +} + define i32 @test_sdot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { entry: ; CHECK-LABEL: test_sdot_v16i8: @@ -270,3 +284,17 @@ entry: %op.extra = add nsw i32 %7, %sum ret i32 %op.extra } + +define i32 @test_sdot_v16i8_2(i8* nocapture readonly %a1) { +; CHECK-LABEL: test_sdot_v16i8_2: +; CHECK: movi {{v[0-9]+}}.16b, #1 +; CHECK: movi {{v[0-9]+}}.2d, #0000000000000000 +; CHECK: sdot {{v[0-9]+}}.4s, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: addv s0, {{v[0-9]+}}.4s +entry: + %0 = bitcast i8* %a1 to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0 + %2 = sext <16 x i8> %1 to <16 x i32> + %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2) + ret i32 %3 +} From 5e8e89d814817fac19c5a93e4ed7910e97401a3f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 15:29:21 +0100 Subject: [PATCH 388/544] TruncInstCombine.cpp - use auto * to fix llvm-qualified-auto clang-tidy warning. NFCI. --- llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index 501e01138e9f1..de61938359386 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -345,7 +345,7 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) { // 1. Update Old-TruncInst -> New-TruncInst. // 2. Remove Old-TruncInst (if New node is not TruncInst). // 3. Add New-TruncInst (if Old node was not TruncInst). - auto Entry = find(Worklist, I); + auto *Entry = find(Worklist, I); if (Entry != Worklist.end()) { if (auto *NewCI = dyn_cast(Res)) *Entry = NewCI; From 0347f3ea720b16305d77449f9868255496e0b27c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 15:30:48 +0100 Subject: [PATCH 389/544] TruncInstCombine.cpp - fix header include ordering to fix llvm-include-order clang-tidy warning. NFCI. --- llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index de61938359386..e9418175c8429 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -31,8 +31,8 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" using namespace llvm; From 3d14a1e982ad27111346471564d575ad5efc6419 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 17:15:32 +0100 Subject: [PATCH 390/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - support for 'partial' bswap patterns (PR47191) If we're bswap'ing some bytes and zero'ing the remainder we can perform this as a bswap+mask which helps us match 'partial' bswaps as a first step towards folding into a more complex bswap pattern. Differential Revision: https://reviews.llvm.org/D88578 --- .../InstCombine/InstCombineAndOrXor.cpp | 35 ++--- llvm/lib/Transforms/Utils/Local.cpp | 39 +++++- llvm/test/Transforms/InstCombine/bswap.ll | 123 +++--------------- 3 files changed, 62 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index cbc3f5a2532f7..edb2dc8881c7b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2046,29 +2046,18 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) { Op1 = Ext->getOperand(0); // (A | B) | C and A | (B | C) -> bswap if possible. - bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())); - - // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value())); - - // (A & B) | (C & D) -> bswap if possible. - bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value())); - - // (A << B) | (C & D) -> bswap if possible. - // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a - // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935, - // C2 = 8 for i32). - // This pattern can occur when the operands of the 'or' are not canonicalized - // for some reason (not having only one use, for example). - bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value()))) || - (match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value()))); - - if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh) + bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); + + // (A >> B) | C and (A << B) | C -> bswap if possible. + bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || + match(Op1, m_LogicalShift(m_Value(), m_Value())); + + // (A & B) | C and A | (B & C) -> bswap if possible. + bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || + match(Op1, m_And(m_Value(), m_Value())); + + if (!OrWithOrs && !OrWithShifts && !OrWithAnds) return nullptr; SmallVector Insts; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0fd0dfa24ce96..0c27d803946e1 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2940,6 +2940,24 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift // amount (modulo). // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -3032,10 +3050,15 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. unsigned DemandedBW = DemandedTy->getBitWidth(); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; bool OKForBitReverse = MatchBitReversals; for (unsigned BitIdx = 0; (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, DemandedBW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], @@ -3050,7 +3073,6 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( else return false; - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); Value *Provider = Res->Provider; // We may need to truncate the provider. @@ -3061,12 +3083,19 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( Provider = Trunc; } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } // We may need to zeroextend back to the result type. - if (ITy != CI->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 14b8ea7ecf2a0..af9350d1c4e0f 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -499,14 +499,8 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { define i32 @partial_bswap(i32 %x) { ; CHECK-LABEL: @partial_bswap( -; CHECK-NEXT: [[X3:%.*]] = shl i32 [[X:%.*]], 24 -; CHECK-NEXT: [[A2:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[X2:%.*]] = and i32 [[A2]], 16711680 -; CHECK-NEXT: [[X32:%.*]] = or i32 [[X3]], [[X2]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[X]], -65536 -; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[T1]]) -; CHECK-NEXT: [[R:%.*]] = or i32 [[X32]], [[T2]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x3 = shl i32 %x, 24 %a2 = shl i32 %x, 8 @@ -543,10 +537,9 @@ declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -571,13 +564,9 @@ define i64 @bswap_and_mask_1(i64 %0) { define i64 @bswap_and_mask_2(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -700,28 +689,8 @@ define i32 @funnel_binary(i32 %abcd) { define i64 @PR47191_problem1(i64 %0) { ; CHECK-LABEL: @PR47191_problem1( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -749,28 +718,8 @@ define i64 @PR47191_problem1(i64 %0) { define i64 @PR47191_problem2(i64 %0) { ; CHECK-LABEL: @PR47191_problem2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -798,28 +747,8 @@ define i64 @PR47191_problem2(i64 %0) { define i64 @PR47191_problem3(i64 %0) { ; CHECK-LABEL: @PR47191_problem3( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -847,28 +776,8 @@ define i64 @PR47191_problem3(i64 %0) { define i64 @PR47191_problem4(i64 %0) { ; CHECK-LABEL: @PR47191_problem4( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 From 432e4e56d3d25c209b3336655aa374095e695956 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Fri, 2 Oct 2020 09:26:21 -0700 Subject: [PATCH 391/544] Revert "[WebAssembly] Emulate v128.const efficiently" This reverts commit 542523a61a21c13e7f244bcf821b0fdeb8c6bb24. --- .../WebAssembly/WebAssemblyISelLowering.cpp | 69 ++----------------- .../CodeGen/WebAssembly/simd-build-vector.ll | 69 ++----------------- 2 files changed, 8 insertions(+), 130 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 8474e50ea42f7..425f8b86c9fbc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -1566,7 +1565,6 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, }; } else if (NumConstantLanes >= NumSplatLanes && Subtarget->hasUnimplementedSIMD128()) { - // If we support v128.const, emit it directly SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1578,67 +1576,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } Result = DAG.getBuildVector(VecT, DL, ConstLanes); - IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { return IsConstant(Lane); }; - } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) { - // Otherwise, if this is an integer vector, pack the lane values together so - // we can construct the 128-bit constant from a pair of i64s using a splat - // followed by at most one i64x2.replace_lane. Also keep track of the lanes - // that actually matter so we can avoid the replace_lane in more cases. - std::array I64s({0, 0}); - std::array ConstLaneMasks({0, 0}); - uint8_t *I64Bytes = reinterpret_cast(I64s.data()); - uint8_t *MaskBytes = reinterpret_cast(ConstLaneMasks.data()); - unsigned I = 0; - size_t ByteStep = VecT.getScalarSizeInBits() / 8; - for (const SDValue &Lane : Op->op_values()) { - if (IsConstant(Lane)) { - using llvm::support::little; - using llvm::support::endian::byte_swap; - // The endianness of the compiler matters here. We want to enforce - // little endianness so that the bytes of a smaller integer type will - // occur first in the uint64_t. - auto *Const = cast(Lane.getNode()); - uint64_t Val = byte_swap(Const->getLimitedValue(), little); - uint8_t *ValPtr = reinterpret_cast(&Val); - std::copy(ValPtr, ValPtr + ByteStep, I64Bytes + I * ByteStep); - uint64_t Mask = uint64_t(-1LL); - uint8_t *MaskPtr = reinterpret_cast(&Mask); - std::copy(MaskPtr, MaskPtr + ByteStep, MaskBytes + I * ByteStep); - } - ++I; - } - // Check whether all constant lanes in the second half of the vector are - // equivalent in the first half or vice versa to determine whether splatting - // either side will be sufficient to materialize the constant. As a special - // case, if the first and second halves have no constant lanes in common, we - // can just combine them. - bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1]; - bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0]; - bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0; - - uint64_t Splatted; - if (SecondHalfSufficient) { - Splatted = I64s[1]; - } else if (CombinedSufficient) { - Splatted = I64s[0] | I64s[1]; - } else { - Splatted = I64s[0]; - } - - Result = DAG.getSplatBuildVector(MVT::v2i64, DL, - DAG.getConstant(Splatted, DL, MVT::i64)); - if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) { - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result, - DAG.getConstant(I64s[1], DL, MVT::i64), - DAG.getConstant(1, DL, MVT::i32)); - } - Result = DAG.getBitcast(VecT, Result); - IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { - return IsConstant(Lane); - }; - } else { + } + if (!Result) { // Use a splat, but possibly a load_splat LoadSDNode *SplattedLoad; if ((SplattedLoad = dyn_cast(SplatValue)) && @@ -1651,14 +1593,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } else { Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } - IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; } - assert(Result); - assert(IsLaneConstructed); - // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index afd7375d146ae..43cfa97933f84 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -8,73 +8,12 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: emulated_const_trivial_splat: -; CHECK-NEXT: .functype emulated_const_trivial_splat () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_trivial_splat() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_first_sufficient: -; CHECK-NEXT: .functype emulated_const_first_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_first_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_second_sufficient: -; CHECK-NEXT: .functype emulated_const_second_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_second_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_combined_sufficient: -; CHECK-NEXT: .functype emulated_const_combined_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_combined_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_either_sufficient: -; CHECK-NEXT: .functype emulated_const_either_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 1 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: return $pop1 -; UNIMP: v128.const -define <4 x i32> @emulated_const_either_sufficient() { - ret <4 x i32> -} - -; CHECK-LABEL: emulated_const_neither_sufficient: -; CHECK-NEXT: .functype emulated_const_neither_sufficient () -> (v128) -; SIMD-VM-NEXT: i64.const $push0=, 8589934593 -; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 -; SIMD-VM-NEXT: i64.const $push2=, 17179869184 -; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2 -; SIMD-VM-NEXT: return $pop3 -define <4 x i32> @emulated_const_neither_sufficient() { - ret <4 x i32> -} - ; CHECK-LABEL: same_const_one_replaced_i16x8: ; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i64x2.splat +; SIMD-VM: i16x8.splat define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -88,7 +27,7 @@ define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i64x2.splat +; SIMD-VM: i16x8.splat define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -129,7 +68,7 @@ define <4 x float> @different_const_one_replaced_f32x4(float %x) { ; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 ; UNIMP-NEXT: return $pop[[L0]] -; SIMD-VM: i64x2.splat +; SIMD-VM: i32x4.splat define <4 x i32> @splat_common_const_i32x4() { ret <4 x i32> } @@ -267,7 +206,7 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla ; UNIMP: i8x16.replace_lane ; UNIMP: i8x16.replace_lane ; UNIMP: return -; SIMD-VM: i64x2.splat +; SIMD-VM: i8x16.splat define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { ; swizzle 0 %m0 = extractelement <16 x i8> %mask, i32 0 From 33fa3dbce91c8e75af57beeb013d82f08cccb733 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 2 Oct 2020 12:24:02 -0400 Subject: [PATCH 392/544] [CostModel] move default handling after switch; NFC We will need to add intrinsics to the switch (such as the ones that are currently in the switch above this one) that deal with special cases and then break to the default handling. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 57 ++++++++++++------------ 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ee755c7890a8c..2a1ec0103d09f 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1161,34 +1161,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { FastMathFlags FMF = ICA.getFlags(); switch (IID) { - default: { - // Assume that we need to scalarize this intrinsic. - SmallVector Types; - for (const Value *Op : Args) { - Type *OpTy = Op->getType(); - assert(VF == 1 || !OpTy->isVectorTy()); - Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF)); - } - - if (VF > 1 && !RetTy->isVoidTy()) - RetTy = FixedVectorType::get(RetTy, VF); - - // Compute the scalarization overhead based on Args for a vector - // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while - // CostModel will pass a vector RetTy and VF is 1. - unsigned ScalarizationCost = std::numeric_limits::max(); - if (RetVF > 1 || VF > 1) { - ScalarizationCost = 0; - if (!RetTy->isVoidTy()) - ScalarizationCost += - getScalarizationOverhead(cast(RetTy), true, false); - ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); - } - - IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, - ScalarizationCost, I); - return thisT()->getIntrinsicInstrCost(Attrs, CostKind); - } + default: + break; case Intrinsic::masked_scatter: { assert(VF == 1 && "Can't vectorize types here."); const Value *Mask = Args[3]; @@ -1262,6 +1236,33 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } } + + // Assume that we need to scalarize this intrinsic. + SmallVector Types; + for (const Value *Op : Args) { + Type *OpTy = Op->getType(); + assert(VF == 1 || !OpTy->isVectorTy()); + Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF)); + } + + if (VF > 1 && !RetTy->isVoidTy()) + RetTy = FixedVectorType::get(RetTy, VF); + + // Compute the scalarization overhead based on Args for a vector + // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while + // CostModel will pass a vector RetTy and VF is 1. + unsigned ScalarizationCost = std::numeric_limits::max(); + if (RetVF > 1 || VF > 1) { + ScalarizationCost = 0; + if (!RetTy->isVoidTy()) + ScalarizationCost += + getScalarizationOverhead(cast(RetTy), true, false); + ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); + } + + IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, + ScalarizationCost, I); + return thisT()->getIntrinsicInstrCost(Attrs, CostKind); } /// Get intrinsic cost based on argument types. From 34d12c15f7d8336c74bd4493e8d284dc169587b9 Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Thu, 30 Jul 2020 14:47:42 -0700 Subject: [PATCH 393/544] [MLIR] Better message for FuncOp type mismatch Previously the actual types were not shown, which makes the message difficult to grok in the context of long lowering chains. Also, it appears that there were no actual tests for this. Differential Revision: https://reviews.llvm.org/D88318 --- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 4 +++- mlir/test/IR/operand.mlir | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 37d8d73e3dc9b..09600963be0ec 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -758,7 +758,9 @@ static LogicalResult verify(CallOp op) { for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i) if (op.getOperand(i).getType() != fnType.getInput(i)) - return op.emitOpError("operand type mismatch"); + return op.emitOpError("operand type mismatch: expected operand type ") + << fnType.getInput(i) << ", but provided " + << op.getOperand(i).getType() << " for operand number " << i; if (fnType.getNumResults() != op.getNumResults()) return op.emitOpError("incorrect number of results for callee"); diff --git a/mlir/test/IR/operand.mlir b/mlir/test/IR/operand.mlir index 3ca8832821c3b..7daa90dcc6946 100644 --- a/mlir/test/IR/operand.mlir +++ b/mlir/test/IR/operand.mlir @@ -33,3 +33,15 @@ func @error_in_second_variadic_operand(%arg0: tensor, %arg1: f32) { "test.mixed_normal_variadic_operand"(%arg0, %arg0, %arg0, %arg1, %arg0) : (tensor, tensor, tensor, f32, tensor) -> () return } + +// ----- + +func @testfunc(%arg0: i32) { + return +} +func @invalid_call_operandtype() { + %0 = constant 0.0 : f32 + // expected-error @+1 {{operand type mismatch: expected operand type 'i32', but provided 'f32' for operand number 0}} + call @testfunc(%0) : (f32) -> () + return +} From 2fc0d4a8e83807d57f8d586af82934f94dead5e3 Mon Sep 17 00:00:00 2001 From: zhanghb97 Date: Wed, 30 Sep 2020 14:11:46 +0800 Subject: [PATCH 394/544] [mlir] Add Float Attribute, Integer Attribute and Bool Attribute subclasses to python bindings. Based on PyAttribute and PyConcreteAttribute classes, this patch implements the bindings of Float Attribute, Integer Attribute and Bool Attribute subclasses. This patch also defines the `mlirFloatAttrDoubleGetChecked` C API which is bound with the `FloatAttr.get_typed` python method. Differential Revision: https://reviews.llvm.org/D88531 --- mlir/include/mlir-c/StandardAttributes.h | 5 + mlir/lib/Bindings/Python/IRModules.cpp | 103 +++++++++++++++++++++ mlir/lib/CAPI/IR/StandardAttributes.cpp | 5 + mlir/test/Bindings/Python/ir_attributes.py | 57 ++++++++++++ 4 files changed, 170 insertions(+) diff --git a/mlir/include/mlir-c/StandardAttributes.h b/mlir/include/mlir-c/StandardAttributes.h index e5d5aeab43430..2fc2ecc9ee1d4 100644 --- a/mlir/include/mlir-c/StandardAttributes.h +++ b/mlir/include/mlir-c/StandardAttributes.h @@ -93,6 +93,11 @@ int mlirAttributeIsAFloat(MlirAttribute attr); MlirAttribute mlirFloatAttrDoubleGet(MlirContext ctx, MlirType type, double value); +/** Same as "mlirFloatAttrDoubleGet", but if the type is not valid for a + * construction of a FloatAttr, returns a null MlirAttribute. */ +MlirAttribute mlirFloatAttrDoubleGetChecked(MlirType type, double value, + MlirLocation loc); + /** Returns the value stored in the given floating point attribute, interpreting * the value as double. */ double mlirFloatAttrGetValueDouble(MlirAttribute attr); diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index 8d64b2d8de0a5..36e25eebfc71b 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -742,6 +742,106 @@ class PyConcreteAttribute : public BaseTy { static void bindDerived(ClassTy &m) {} }; +/// Float Point Attribute subclass - FloatAttr. +class PyFloatAttribute : public PyConcreteAttribute { +public: + static constexpr IsAFunctionTy isaFunction = mlirAttributeIsAFloat; + static constexpr const char *pyClassName = "FloatAttr"; + using PyConcreteAttribute::PyConcreteAttribute; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get", + // TODO: Make the location optional and create a default location. + [](PyType &type, double value, PyLocation &loc) { + MlirAttribute attr = + mlirFloatAttrDoubleGetChecked(type.type, value, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirAttributeIsNull(attr)) { + throw SetPyError(PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(type)).cast() + + "' and expected floating point type."); + } + return PyFloatAttribute(type.getContext(), attr); + }, + py::arg("type"), py::arg("value"), py::arg("loc"), + "Gets an uniqued float point attribute associated to a type"); + c.def_static( + "get_f32", + [](PyMlirContext &context, double value) { + MlirAttribute attr = mlirFloatAttrDoubleGet( + context.get(), mlirF32TypeGet(context.get()), value); + return PyFloatAttribute(context.getRef(), attr); + }, + py::arg("context"), py::arg("value"), + "Gets an uniqued float point attribute associated to a f32 type"); + c.def_static( + "get_f64", + [](PyMlirContext &context, double value) { + MlirAttribute attr = mlirFloatAttrDoubleGet( + context.get(), mlirF64TypeGet(context.get()), value); + return PyFloatAttribute(context.getRef(), attr); + }, + py::arg("context"), py::arg("value"), + "Gets an uniqued float point attribute associated to a f64 type"); + c.def_property_readonly( + "value", + [](PyFloatAttribute &self) { + return mlirFloatAttrGetValueDouble(self.attr); + }, + "Returns the value of the float point attribute"); + } +}; + +/// Integer Attribute subclass - IntegerAttr. +class PyIntegerAttribute : public PyConcreteAttribute { +public: + static constexpr IsAFunctionTy isaFunction = mlirAttributeIsAInteger; + static constexpr const char *pyClassName = "IntegerAttr"; + using PyConcreteAttribute::PyConcreteAttribute; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get", + [](PyType &type, int64_t value) { + MlirAttribute attr = mlirIntegerAttrGet(type.type, value); + return PyIntegerAttribute(type.getContext(), attr); + }, + py::arg("type"), py::arg("value"), + "Gets an uniqued integer attribute associated to a type"); + c.def_property_readonly( + "value", + [](PyIntegerAttribute &self) { + return mlirIntegerAttrGetValueInt(self.attr); + }, + "Returns the value of the integer attribute"); + } +}; + +/// Bool Attribute subclass - BoolAttr. +class PyBoolAttribute : public PyConcreteAttribute { +public: + static constexpr IsAFunctionTy isaFunction = mlirAttributeIsABool; + static constexpr const char *pyClassName = "BoolAttr"; + using PyConcreteAttribute::PyConcreteAttribute; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get", + [](PyMlirContext &context, bool value) { + MlirAttribute attr = mlirBoolAttrGet(context.get(), value); + return PyBoolAttribute(context.getRef(), attr); + }, + py::arg("context"), py::arg("value"), "Gets an uniqued bool attribute"); + c.def_property_readonly( + "value", + [](PyBoolAttribute &self) { return mlirBoolAttrGetValue(self.attr); }, + "Returns the value of the bool attribute"); + } +}; + class PyStringAttribute : public PyConcreteAttribute { public: static constexpr IsAFunctionTy isaFunction = mlirAttributeIsAString; @@ -1630,6 +1730,9 @@ void mlir::python::populateIRSubmodule(py::module &m) { "The underlying generic attribute of the NamedAttribute binding"); // Standard attribute bindings. + PyFloatAttribute::bind(m); + PyIntegerAttribute::bind(m); + PyBoolAttribute::bind(m); PyStringAttribute::bind(m); // Mapping of Type. diff --git a/mlir/lib/CAPI/IR/StandardAttributes.cpp b/mlir/lib/CAPI/IR/StandardAttributes.cpp index 77d5fcb8b33c2..1277d2b041ac2 100644 --- a/mlir/lib/CAPI/IR/StandardAttributes.cpp +++ b/mlir/lib/CAPI/IR/StandardAttributes.cpp @@ -102,6 +102,11 @@ MlirAttribute mlirFloatAttrDoubleGet(MlirContext ctx, MlirType type, return wrap(FloatAttr::get(unwrap(type), value)); } +MlirAttribute mlirFloatAttrDoubleGetChecked(MlirType type, double value, + MlirLocation loc) { + return wrap(FloatAttr::getChecked(unwrap(type), value, unwrap(loc))); +} + double mlirFloatAttrGetValueDouble(MlirAttribute attr) { return unwrap(attr).cast().getValueAsDouble(); } diff --git a/mlir/test/Bindings/Python/ir_attributes.py b/mlir/test/Bindings/Python/ir_attributes.py index a2fd50056bf00..dfdc81909a9a5 100644 --- a/mlir/test/Bindings/Python/ir_attributes.py +++ b/mlir/test/Bindings/Python/ir_attributes.py @@ -92,6 +92,63 @@ def testStandardAttrCasts(): run(testStandardAttrCasts) +# CHECK-LABEL: TEST: testFloatAttr +def testFloatAttr(): + ctx = mlir.ir.Context() + fattr = mlir.ir.FloatAttr(ctx.parse_attr("42.0 : f32")) + # CHECK: fattr value: 42.0 + print("fattr value:", fattr.value) + + # Test factory methods. + loc = ctx.get_unknown_location() + # CHECK: default_get: 4.200000e+01 : f32 + print("default_get:", mlir.ir.FloatAttr.get( + mlir.ir.F32Type(ctx), 42.0, loc)) + # CHECK: f32_get: 4.200000e+01 : f32 + print("f32_get:", mlir.ir.FloatAttr.get_f32(ctx, 42.0)) + # CHECK: f64_get: 4.200000e+01 : f64 + print("f64_get:", mlir.ir.FloatAttr.get_f64(ctx, 42.0)) + try: + fattr_invalid = mlir.ir.FloatAttr.get( + mlir.ir.IntegerType.get_signless(ctx, 32), 42, loc) + except ValueError as e: + # CHECK: invalid 'Type(i32)' and expected floating point type. + print(e) + else: + print("Exception not produced") + +run(testFloatAttr) + + +# CHECK-LABEL: TEST: testIntegerAttr +def testIntegerAttr(): + ctx = mlir.ir.Context() + iattr = mlir.ir.IntegerAttr(ctx.parse_attr("42")) + # CHECK: iattr value: 42 + print("iattr value:", iattr.value) + + # Test factory methods. + # CHECK: default_get: 42 : i32 + print("default_get:", mlir.ir.IntegerAttr.get( + mlir.ir.IntegerType.get_signless(ctx, 32), 42)) + +run(testIntegerAttr) + + +# CHECK-LABEL: TEST: testBoolAttr +def testBoolAttr(): + ctx = mlir.ir.Context() + battr = mlir.ir.BoolAttr(ctx.parse_attr("true")) + # CHECK: iattr value: 1 + print("iattr value:", battr.value) + + # Test factory methods. + # CHECK: default_get: true + print("default_get:", mlir.ir.BoolAttr.get(ctx, True)) + +run(testBoolAttr) + + # CHECK-LABEL: TEST: testStringAttr def testStringAttr(): ctx = mlir.ir.Context() From 64c54c5459cfae8478ce28710784f36b0d94fb2f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Oct 2020 22:42:14 +0200 Subject: [PATCH 395/544] [MemCpyOpt] Regnerate test checks (NFC) --- .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 18 +- .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 15 +- .../2011-06-02-CallSlotOverwritten.ll | 20 +- .../MemCpyOpt/aggregate-type-crash.ll | 11 +- llvm/test/Transforms/MemCpyOpt/align.ll | 20 +- llvm/test/Transforms/MemCpyOpt/atomic.ll | 27 +- llvm/test/Transforms/MemCpyOpt/callslot_aa.ll | 8 +- .../Transforms/MemCpyOpt/callslot_deref.ll | 18 +- .../Transforms/MemCpyOpt/callslot_throw.ll | 29 +- .../Transforms/MemCpyOpt/capturing-func.ll | 13 +- llvm/test/Transforms/MemCpyOpt/crash.ll | 31 +- llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 125 ++--- llvm/test/Transforms/MemCpyOpt/form-memset.ll | 459 +++++++++++------- .../Transforms/MemCpyOpt/invariant.start.ll | 27 +- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 10 +- .../MemCpyOpt/load-store-to-memcpy.ll | 6 +- .../Transforms/MemCpyOpt/loadstore-sret.ll | 14 +- .../Transforms/MemCpyOpt/memcpy-to-memset.ll | 71 +-- .../test/Transforms/MemCpyOpt/memcpy-undef.ll | 26 +- llvm/test/Transforms/MemCpyOpt/memcpy.ll | 136 ++++-- llvm/test/Transforms/MemCpyOpt/memmove.ll | 27 +- .../MemCpyOpt/memset-memcpy-oversized.ll | 8 +- .../memset-memcpy-redundant-memset.ll | 183 ++++--- .../MemCpyOpt/memset-memcpy-to-2x-memset.ll | 84 ++-- llvm/test/Transforms/MemCpyOpt/nontemporal.ll | 41 +- llvm/test/Transforms/MemCpyOpt/pr29105.ll | 13 +- llvm/test/Transforms/MemCpyOpt/pr37967.ll | 18 +- .../Transforms/MemCpyOpt/process_store.ll | 27 +- .../Transforms/MemCpyOpt/profitable-memset.ll | 15 +- llvm/test/Transforms/MemCpyOpt/smaller.ll | 13 +- llvm/test/Transforms/MemCpyOpt/sret.ll | 21 +- .../test/Transforms/MemCpyOpt/stackrestore.ll | 42 +- .../store-to-memset-is-nonzero-type.ll | 2 +- .../Transforms/MemCpyOpt/store-to-memset.ll | 42 +- .../Transforms/MemCpyOpt/vscale-memset.ll | 4 +- 35 files changed, 1091 insertions(+), 533 deletions(-) diff --git a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll index 237b8fec4f645..0f8c417f21277 100644 --- a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s ; PR2077 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" @@ -7,6 +8,14 @@ target triple = "i386-pc-linux-gnu" %0 = type { x86_fp80, x86_fp80 } define internal fastcc void @initialize(%0* noalias nocapture sret %agg.result) nounwind { +; CHECK-LABEL: @initialize( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_RESULT_03:%.*]] = getelementptr [[TMP0:%.*]], %0* [[AGG_RESULT:%.*]], i32 0, i32 0 +; CHECK-NEXT: store x86_fp80 0xK00000000000000000000, x86_fp80* [[AGG_RESULT_03]], align 4 +; CHECK-NEXT: [[AGG_RESULT_15:%.*]] = getelementptr [[TMP0]], %0* [[AGG_RESULT]], i32 0, i32 1 +; CHECK-NEXT: store x86_fp80 0xK00000000000000000000, x86_fp80* [[AGG_RESULT_15]], align 4 +; CHECK-NEXT: ret void +; entry: %agg.result.03 = getelementptr %0, %0* %agg.result, i32 0, i32 0 store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 @@ -18,6 +27,13 @@ entry: declare fastcc x86_fp80 @passed_uninitialized(%0* nocapture) nounwind define fastcc void @badly_optimized() nounwind { +; CHECK-LABEL: @badly_optimized( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Z:%.*]] = alloca [[TMP0:%.*]], align 8 +; CHECK-NEXT: call fastcc void @initialize(%0* noalias sret [[Z]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fastcc x86_fp80 @passed_uninitialized(%0* [[Z]]) +; CHECK-NEXT: ret void +; entry: %z = alloca %0 %tmp = alloca %0 diff --git a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll index a0f34b9baa6df..dbe819adb689c 100644 --- a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -S | not grep "call.*memcpy." +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" %a = type { i32 } @@ -7,6 +8,18 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 declare void @g(%a* nocapture) define float @f() { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_VAR:%.*]] = alloca [[A:%.*]], align 8 +; CHECK-NEXT: [[B_VAR:%.*]] = alloca [[B:%.*]], align 8 +; CHECK-NEXT: [[B_VAR1:%.*]] = bitcast %b* [[B_VAR]] to %a* +; CHECK-NEXT: call void @g(%a* [[B_VAR1]]) +; CHECK-NEXT: [[A_I8:%.*]] = bitcast %a* [[A_VAR]] to i8* +; CHECK-NEXT: [[B_I8:%.*]] = bitcast %b* [[B_VAR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[B]], %b* [[B_VAR]], i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 +; CHECK-NEXT: ret float [[TMP2]] +; entry: %a_var = alloca %a %b_var = alloca %b, align 1 diff --git a/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll b/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll index 8ba8df4d8b395..bd086967ec29b 100644 --- a/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll +++ b/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s ; PR10067 ; Make sure the call+copy isn't optimized in such a way that @@ -12,10 +13,25 @@ target triple = "i386-apple-darwin10" declare void @bar(%struct1* nocapture sret %agg.result) nounwind define i32 @foo() nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[X:%.*]] = alloca [[STRUCT1:%.*]], align 8 +; CHECK-NEXT: [[Y:%.*]] = alloca [[STRUCT2:%.*]], align 8 +; CHECK-NEXT: call void @bar(%struct1* sret [[X]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[GEPN1:%.*]] = getelementptr inbounds [[STRUCT2]], %struct2* [[Y]], i32 0, i32 0, i32 0 +; CHECK-NEXT: store i32 0, i32* [[GEPN1]], align 8 +; CHECK-NEXT: [[GEPN2:%.*]] = getelementptr inbounds [[STRUCT2]], %struct2* [[Y]], i32 0, i32 0, i32 1 +; CHECK-NEXT: store i32 0, i32* [[GEPN2]], align 4 +; CHECK-NEXT: [[BIT1:%.*]] = bitcast %struct1* [[X]] to i64* +; CHECK-NEXT: [[BIT2:%.*]] = bitcast %struct2* [[Y]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[BIT1]], align 8 +; CHECK-NEXT: store i64 [[LOAD]], i64* [[BIT2]], align 8 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [[STRUCT2]], %struct2* [[Y]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[GEP1]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; %x = alloca %struct1, align 8 %y = alloca %struct2, align 8 call void @bar(%struct1* sret %x) nounwind -; CHECK: call void @bar(%struct1* sret %x) %gepn1 = getelementptr inbounds %struct2, %struct2* %y, i32 0, i32 0, i32 0 store i32 0, i32* %gepn1, align 8 @@ -27,8 +43,6 @@ define i32 @foo() nounwind { %load = load i64, i64* %bit1, align 8 store i64 %load, i64* %bit2, align 8 -; CHECK: %load = load i64, i64* %bit1, align 8 -; CHECK: store i64 %load, i64* %bit2, align 8 %gep1 = getelementptr %struct2, %struct2* %y, i32 0, i32 0, i32 0 %ret = load i32, i32* %gep1 diff --git a/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll b/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll index 16d107730acd1..dd9536a858881 100644 --- a/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll +++ b/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S -o - < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -9,13 +10,19 @@ target triple = "x86_64-apple-macosx10.14.0" declare noalias i8* @my_malloc(%my_struct*) #0 define void @my_func(%my_struct* %0) { +; CHECK-LABEL: @my_func( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load [[MY_STRUCT:%.*]], %my_struct* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @my_malloc(%my_struct* [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %my_struct* +; CHECK-NEXT: store [[MY_STRUCT]] [[TMP1]], %my_struct* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; entry: -; CHECK: entry: %1 = load %my_struct, %my_struct* %0 %2 = call i8* @my_malloc(%my_struct* %0) %3 = bitcast i8* %2 to %my_struct* store %my_struct %1, %my_struct* %3 -; CHECK-NOT: call void @llvm.memcpy.{{.*}}.{{.*}}.{{.*}} ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/align.ll b/llvm/test/Transforms/MemCpyOpt/align.ll index 2e683bfa91f5b..cdaf44f69e01c 100644 --- a/llvm/test/Transforms/MemCpyOpt/align.ll +++ b/llvm/test/Transforms/MemCpyOpt/align.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" @@ -9,7 +10,14 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind define void @foo(i32* %p) { ; CHECK-LABEL: @foo( -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 16, i1 false) +; CHECK-NEXT: [[A0:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 0 +; CHECK-NEXT: [[A1:%.*]] = getelementptr i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[A3:%.*]] = getelementptr i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; %a0 = getelementptr i32, i32* %p, i64 0 store i32 0, i32* %a0, align 4 %a1 = getelementptr i32, i32* %p, i64 1 @@ -25,8 +33,14 @@ define void @foo(i32* %p) { define void @bar() { ; CHECK-LABEL: @bar( -; CHECK: %a4 = alloca i32, align 8 -; CHECK-NOT: memcpy +; CHECK-NEXT: [[A4:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[A8:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[A8_CAST:%.*]] = bitcast i32* [[A8]] to i8* +; CHECK-NEXT: [[A4_CAST:%.*]] = bitcast i32* [[A4]] to i8* +; CHECK-NEXT: [[A41:%.*]] = bitcast i32* [[A4]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A41]], i8 0, i64 4, i1 false) +; CHECK-NEXT: ret void +; %a4 = alloca i32, align 4 %a8 = alloca i32, align 8 %a8.cast = bitcast i32* %a8 to i8* diff --git a/llvm/test/Transforms/MemCpyOpt/atomic.ll b/llvm/test/Transforms/MemCpyOpt/atomic.ll index 65f6c925e205b..ed31766b2f547 100644 --- a/llvm/test/Transforms/MemCpyOpt/atomic.ll +++ b/llvm/test/Transforms/MemCpyOpt/atomic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -memcpyopt -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" @@ -11,8 +12,16 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; memcpyopt should not touch atomic ops define void @test1() nounwind uwtable ssp { -; CHECK: test1 -; CHECK: store atomic +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[X:%.*]] = alloca [101 x i32], align 16 +; CHECK-NEXT: [[BC:%.*]] = bitcast [101 x i32]* [[X]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[BC]], i8 0, i64 400, i1 false) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 100 +; CHECK-NEXT: store atomic i32 0, i32* [[GEP1]] unordered, align 4 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 0 +; CHECK-NEXT: call void @otherf(i32* [[GEP2]]) +; CHECK-NEXT: ret void +; %x = alloca [101 x i32], align 16 %bc = bitcast [101 x i32]* %x to i8* call void @llvm.memset.p0i8.i64(i8* align 16 %bc, i8 0, i64 400, i1 false) @@ -25,17 +34,21 @@ define void @test1() nounwind uwtable ssp { ; memcpyopt across unordered store define void @test2() nounwind uwtable ssp { -; CHECK: test2 -; CHECK: call -; CHECK-NEXT: store atomic -; CHECK-NEXT: call +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[OLD:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[NEW:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; CHECK-NEXT: store atomic i32 0, i32* @x unordered, align 4 +; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; CHECK-NEXT: ret void +; %old = alloca i32 %new = alloca i32 call void @otherf(i32* nocapture %old) store atomic i32 0, i32* @x unordered, align 4 %v = load i32, i32* %old store i32 %v, i32* %new - call void @otherf(i32* nocapture %new) + call void @otherf(i32* nocapture %new) ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll index 1d45cbe9e5cb7..6e7b78d4da715 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll @@ -1,12 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %T = type { i64, i64 } define void @test(i8* %src) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC:%.*]], i64 1, i1 false) +; CHECK-NEXT: ret void +; %tmp = alloca i8 %dst = alloca i8 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !noalias !2 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll index ad578be711cd0..a2c0503894a13 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,8 +8,13 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; all bytes of %dst that are touch by the memset are dereferenceable define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) { ; CHECK-LABEL: @must_remove_memcpy( -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; CHECK-NEXT: [[DST1:%.*]] = bitcast i8* [[DST:%.*]] to [4096 x i8]* +; CHECK-NEXT: [[DST12:%.*]] = bitcast [4096 x i8]* [[DST1]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST12]], i8 0, i64 4096, i1 false) +; CHECK-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false) @@ -20,8 +26,12 @@ define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst ; We can't remove the memcpy, but we can turn it into an independent memset. define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) { ; CHECK-LABEL: @must_not_remove_memcpy( -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P]], i8 0, i64 4096, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST:%.*]], i8 0, i64 4096, i1 false) +; CHECK-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll index 1aa4c92efc72c..7092f046af317 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll @@ -1,34 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s declare void @may_throw(i32* nocapture %x) -; CHECK-LABEL: define void @test1( define void @test1(i32* nocapture noalias dereferenceable(4) %x) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @may_throw(i32* nonnull [[T]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; CHECK-NEXT: store i32 [[LOAD]], i32* [[X:%.*]], align 4 +; CHECK-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) %load = load i32, i32* %t, align 4 store i32 %load, i32* %x, align 4 -; CHECK: %[[t:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]]) -; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4 -; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4 ret void } declare void @always_throws() -; CHECK-LABEL: define void @test2( define void @test2(i32* nocapture noalias dereferenceable(4) %x) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @may_throw(i32* nonnull [[T]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; CHECK-NEXT: call void @always_throws() +; CHECK-NEXT: store i32 [[LOAD]], i32* [[X:%.*]], align 4 +; CHECK-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) nounwind %load = load i32, i32* %t, align 4 call void @always_throws() store i32 %load, i32* %x, align 4 -; CHECK: %[[t:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]]) -; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4 -; CHECK-NEXT: call void @always_throws() -; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4 ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll index 0ea889a664979..8376ecd3d30d2 100644 --- a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll +++ b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e" @@ -6,6 +7,14 @@ declare void @foo(i8*) declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[PTR1:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[PTR2:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @foo(i8* [[PTR2]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false) +; CHECK-NEXT: call void @foo(i8* [[PTR1]]) +; CHECK-NEXT: ret void +; %ptr1 = alloca i8 %ptr2 = alloca i8 call void @foo(i8* %ptr2) @@ -15,8 +24,4 @@ define void @test() { ; Check that the transformation isn't applied if the called function can ; capture the pointer argument (i.e. the nocapture attribute isn't present) - ; CHECK-LABEL: @test( - ; CHECK: call void @foo(i8* %ptr2) - ; CHECK-NEXT: call void @llvm.memcpy - ; CHECK-NEXT: call void @foo(i8* %ptr1) } diff --git a/llvm/test/Transforms/MemCpyOpt/crash.ll b/llvm/test/Transforms/MemCpyOpt/crash.ll index 1fd4d0deae6df..489a1827604b1 100644 --- a/llvm/test/Transforms/MemCpyOpt/crash.ll +++ b/llvm/test/Transforms/MemCpyOpt/crash.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -disable-output +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" @@ -8,6 +9,30 @@ target triple = "armv7-eabi" ; PR4882 define void @test1(%struct.bar* %this) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_BAR:%.*]], %struct.bar* [[THIS:%.*]], i32 0, i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 0, i32 0, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 0, i32 0, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 3, i32 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 3, i32 0, i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 3, i32 0, i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 5 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP16]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP8]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP17]], i8 0, i64 32, i1 false) +; CHECK-NEXT: unreachable +; entry: %0 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 0, i32 0, i32 0 store float 0.000000e+00, float* %0, align 4 @@ -49,6 +74,10 @@ entry: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @test2(i32 %cmd) nounwind { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) [[ATTR1:#.*]] +; CHECK-NEXT: ret void +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 20, i1 false) nounwind call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) nounwind ret void diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll index 6ce1aee338d81..777ba51f38271 100644 --- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64" @@ -6,41 +7,49 @@ target triple = "x86_64-unknown-linux-gnu" %S = type { i8*, i8, i32 } define void @copy(%S* %src, %S* %dst) { -; CHECK-LABEL: copy -; CHECK-NOT: load -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @copy( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @noaliassrc(%S* noalias %src, %S* %dst) { -; CHECK-LABEL: noaliassrc -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliassrc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @noaliasdst(%S* %src, %S* noalias %dst) { -; CHECK-LABEL: noaliasdst -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliasdst( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @destroysrc(%S* %src, %S* %dst) { -; CHECK-LABEL: destroysrc -; CHECK: load %S, %S* %src -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: store %S %1, %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @destroysrc( +; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], %S* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP2]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store [[S]] [[TMP1]], %S* [[DST:%.*]], align 8 +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -48,11 +57,14 @@ define void @destroysrc(%S* %src, %S* %dst) { } define void @destroynoaliassrc(%S* noalias %src, %S* %dst) { -; CHECK-LABEL: destroynoaliassrc -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @destroynoaliassrc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -60,12 +72,14 @@ define void @destroynoaliassrc(%S* noalias %src, %S* %dst) { } define void @copyalias(%S* %src, %S* %dst) { -; CHECK-LABEL: copyalias -; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src -; CHECK-NOT: load -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: store %S [[LOAD]], %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @copyalias( +; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], %S* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: store [[S]] [[TMP1]], %S* [[DST]], align 8 +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src %2 = load %S, %S* %src store %S %1, %S* %dst @@ -76,14 +90,15 @@ define void @copyalias(%S* %src, %S* %dst) { ; If the store address is computed in a complex manner, make ; sure we lift the computation as well if needed and possible. define void @addrproducer(%S* %src, %S* %dst) { -; CHECK-LABEL: addrproducer( -; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8* -; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1 -; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8* -; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8* -; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @addrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], %S* [[DST]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST2]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dst2 = getelementptr %S , %S* %dst, i64 1 @@ -92,14 +107,15 @@ define void @addrproducer(%S* %src, %S* %dst) { } define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { -; CHECK-LABEL: aliasaddrproducer( -; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src -; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false) -; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr -; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex -; CHECK-NEXT: store %S %[[SRC]], %S* %dst2 -; CHECK-NEXT: ret void +; CHECK-LABEL: @aliasaddrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], %S* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP2]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: [[DSTINDEX:%.*]] = load i32, i32* [[DSTIDPTR:%.*]], align 4 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S]], %S* [[DST]], i32 [[DSTINDEX]] +; CHECK-NEXT: store [[S]] [[TMP1]], %S* [[DST2]], align 8 +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dstindex = load i32, i32* %dstidptr @@ -109,16 +125,17 @@ define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { } define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) { -; CHECK-LABEL: noaliasaddrproducer( -; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8* -; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr -; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1 -; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex -; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8* -; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliasaddrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DSTIDPTR:%.*]], align 4 +; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], %S* [[DST:%.*]], i32 [[DSTINDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[DST2]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %src %2 = load i32, i32* %dstidptr diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll index dde025dac9268..bec6b8855a2be 100644 --- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s ; All the stores in this example should be merged into a single memset. @@ -6,53 +7,74 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i386-apple-darwin8" define void @test1(i8 signext %c) nounwind { -entry: - %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] - %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] - store i8 %c, i8* %tmp, align 1 - %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] - store i8 %c, i8* %tmp5, align 1 - %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] - store i8 %c, i8* %tmp9, align 1 - %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] - store i8 %c, i8* %tmp13, align 1 - %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] - store i8 %c, i8* %tmp17, align 1 - %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] - store i8 %c, i8* %tmp21, align 1 - %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] - store i8 %c, i8* %tmp25, align 1 - %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] - store i8 %c, i8* %tmp29, align 1 - %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] - store i8 %c, i8* %tmp33, align 1 - %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] - store i8 %c, i8* %tmp37, align 1 - %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] - store i8 %c, i8* %tmp41, align 1 - %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] - store i8 %c, i8* %tmp45, align 1 - %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] - store i8 %c, i8* %tmp49, align 1 - %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] - store i8 %c, i8* %tmp53, align 1 - %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] - store i8 %c, i8* %tmp57, align 1 - %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] - store i8 %c, i8* %tmp61, align 1 - %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] - store i8 %c, i8* %tmp65, align 1 - %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] - store i8 %c, i8* %tmp69, align 1 - %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] - store i8 %c, i8* %tmp73, align 1 - %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind - ret void ; CHECK-LABEL: @test1( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: store -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = alloca [19 x i8], align 1 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 4 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 5 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 6 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 7 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 8 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 9 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 10 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 11 +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 12 +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 13 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 14 +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 15 +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 17 +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 18 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP]], i8 [[C:%.*]], i64 19, i1 false) +; CHECK-NEXT: [[TMP76:%.*]] = call i32 (...) @bar([19 x i8]* [[X]]) [[ATTR0:#.*]] +; CHECK-NEXT: ret void +; +entry: + %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] + %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] + store i8 %c, i8* %tmp, align 1 + %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] + store i8 %c, i8* %tmp5, align 1 + %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] + store i8 %c, i8* %tmp9, align 1 + %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] + store i8 %c, i8* %tmp13, align 1 + %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] + store i8 %c, i8* %tmp17, align 1 + %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] + store i8 %c, i8* %tmp21, align 1 + %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] + store i8 %c, i8* %tmp25, align 1 + %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] + store i8 %c, i8* %tmp29, align 1 + %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] + store i8 %c, i8* %tmp33, align 1 + %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] + store i8 %c, i8* %tmp37, align 1 + %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] + store i8 %c, i8* %tmp41, align 1 + %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] + store i8 %c, i8* %tmp45, align 1 + %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] + store i8 %c, i8* %tmp49, align 1 + %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] + store i8 %c, i8* %tmp53, align 1 + %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] + store i8 %c, i8* %tmp57, align 1 + %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] + store i8 %c, i8* %tmp61, align 1 + %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] + store i8 %c, i8* %tmp65, align 1 + %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] + store i8 %c, i8* %tmp69, align 1 + %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] + store i8 %c, i8* %tmp73, align 1 + %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind + ret void } declare i32 @bar(...) @@ -61,104 +83,150 @@ declare i32 @bar(...) define void @test2() nounwind { -entry: - %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] - %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] - store i8 -1, i8* %tmp20, align 1 - %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] - store i8 -1, i8* %tmp23, align 1 - %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] - store i8 -1, i8* %tmp26, align 1 - %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] - store i8 -1, i8* %tmp29, align 1 - %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] - store i8 -1, i8* %tmp32, align 1 - %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] - store i8 -1, i8* %tmp35, align 1 - %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] - store i8 -1, i8* %tmp38, align 1 - %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] - store i8 -1, i8* %tmp41, align 1 - %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp43, align 2 - %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp46, align 2 - %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp57, align 2 - %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp60, align 2 - %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp71, align 2 - %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp74, align 2 - %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp85, align 2 - %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp88, align 2 - %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp99, align 2 - %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp102, align 2 - %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp113, align 2 - %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp116, align 2 - %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp127, align 2 - %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp130, align 2 - %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp141, align 8 - %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp144, align 2 - %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp148, align 2 - %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp151, align 2 - %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp162, align 2 - %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp165, align 2 - %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp176, align 2 - %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp179, align 2 - %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp190, align 2 - %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp193, align 2 - %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp204, align 2 - %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp207, align 2 - %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp218, align 2 - %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp221, align 2 - %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp232, align 2 - %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp235, align 2 - %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp246, align 8 - %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp249, align 2 - %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind - ret void - ; CHECK-LABEL: @test2( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %tmp41, i8 -1, i64 8, i1 false) -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 32, i1 false) -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 0, i64 32, i1 false) -; CHECK-NOT: store -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REF_IDX:%.*]] = alloca [8 x i8], align 1 +; CHECK-NEXT: [[LEFT_MVD:%.*]] = alloca [8 x %struct.MV], align 8 +; CHECK-NEXT: [[UP_MVD:%.*]] = alloca [8 x %struct.MV], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 7 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 6 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 5 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 4 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 1 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 0 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP41]], i8 -1, i64 8, i1 false) +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 1 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 0 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 1 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 0 +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 1 +; CHECK-NEXT: [[TMP85:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 0 +; CHECK-NEXT: [[TMP88:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 1 +; CHECK-NEXT: [[TMP99:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[TMP102:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[TMP113:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 0 +; CHECK-NEXT: [[TMP116:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 1 +; CHECK-NEXT: [[TMP127:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP130:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP141:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP144:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP148:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[TMP141]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP151:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 1 +; CHECK-NEXT: [[TMP162:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 0 +; CHECK-NEXT: [[TMP165:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 1 +; CHECK-NEXT: [[TMP176:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 0 +; CHECK-NEXT: [[TMP179:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 1 +; CHECK-NEXT: [[TMP190:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 0 +; CHECK-NEXT: [[TMP193:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 1 +; CHECK-NEXT: [[TMP204:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[TMP207:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[TMP218:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 0 +; CHECK-NEXT: [[TMP221:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 1 +; CHECK-NEXT: [[TMP232:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP235:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP246:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP249:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[UP_MVD252:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0 +; CHECK-NEXT: [[LEFT_MVD253:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP246]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call void @foo(%struct.MV* [[UP_MVD252]], %struct.MV* [[LEFT_MVD253]], i8* [[TMP41]]) [[ATTR0]] +; CHECK-NEXT: ret void +; +entry: + %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] + %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] + store i8 -1, i8* %tmp20, align 1 + %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] + store i8 -1, i8* %tmp23, align 1 + %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] + store i8 -1, i8* %tmp26, align 1 + %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] + store i8 -1, i8* %tmp29, align 1 + %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] + store i8 -1, i8* %tmp32, align 1 + %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] + store i8 -1, i8* %tmp35, align 1 + %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] + store i8 -1, i8* %tmp38, align 1 + %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] + store i8 -1, i8* %tmp41, align 1 + %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp43, align 2 + %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp46, align 2 + %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp57, align 2 + %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp60, align 2 + %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp71, align 2 + %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp74, align 2 + %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp85, align 2 + %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp88, align 2 + %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp99, align 2 + %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp102, align 2 + %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp113, align 2 + %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp116, align 2 + %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp127, align 2 + %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp130, align 2 + %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp141, align 8 + %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp144, align 2 + %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp148, align 2 + %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp151, align 2 + %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp162, align 2 + %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp165, align 2 + %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp176, align 2 + %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp179, align 2 + %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp190, align 2 + %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp193, align 2 + %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp204, align 2 + %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp207, align 2 + %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp218, align 2 + %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp221, align 2 + %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp232, align 2 + %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp235, align 2 + %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp246, align 8 + %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp249, align 2 + %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind + ret void + } declare void @foo(%struct.MV*, %struct.MV*, i8*) @@ -166,6 +234,15 @@ declare void @foo(%struct.MV*, %struct.MV*, i8*) ; Store followed by memset. define void @test3(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 store i32 0, i32* %arrayidx, align 4 @@ -173,28 +250,39 @@ entry: %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false) ret void -; CHECK-LABEL: @test3( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } ; store followed by memset, different offset scenario define void @test4(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: store i32 0, i32* %P, align 4 %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false) ret void -; CHECK-LABEL: @test4( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Memset followed by store. define void @test5(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 %0 = bitcast i32* %add.ptr to i8* @@ -202,13 +290,19 @@ entry: %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 store i32 0, i32* %arrayidx, align 4 ret void -; CHECK-LABEL: @test5( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } ;; Memset followed by memset. define void @test6(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 24, i1 false) +; CHECK-NEXT: ret void +; entry: %0 = bitcast i32* %P to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i1 false) @@ -216,13 +310,20 @@ entry: %1 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i1 false) ret void -; CHECK-LABEL: @test6( -; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i1 false) } ; More aggressive heuristic ; rdar://9892684 define void @test7(i32* nocapture %c) nounwind optsize { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[C]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP5]], i8 -1, i64 20, i1 false) +; CHECK-NEXT: ret void +; store i32 -1, i32* %c, align 4 %1 = getelementptr inbounds i32, i32* %c, i32 1 store i32 -1, i32* %1, align 4 @@ -232,26 +333,33 @@ define void @test7(i32* nocapture %c) nounwind optsize { store i32 -1, i32* %3, align 4 %4 = getelementptr inbounds i32, i32* %c, i32 4 store i32 -1, i32* %4, align 4 -; CHECK-LABEL: @test7( -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %5, i8 -1, i64 20, i1 false) ret void } %struct.test8 = type { [4 x i32] } define void @test8() { +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[STRUCT_TEST8:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.test8* [[MEMTMP]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: ret void +; entry: %memtmp = alloca %struct.test8, align 16 %0 = bitcast %struct.test8* %memtmp to <4 x i32>* store <4 x i32> , <4 x i32>* %0, align 16 ret void -; CHECK-LABEL: @test8( -; CHECK: store <4 x i32> , <4 x i32>* %0, align 16 } @test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16 define void @test9() nounwind { +; CHECK-LABEL: @test9( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false) +; CHECK-NEXT: ret void +; store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2 @@ -269,24 +377,31 @@ define void @test9() nounwind { store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1 ret void -; CHECK-LABEL: @test9( -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false) } ; PR19092 define void @test10(i8* nocapture %P) nounwind { +; CHECK-LABEL: @test10( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 0, i64 42, i1 false) +; CHECK-NEXT: ret void +; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false) tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i1 false) ret void -; CHECK-LABEL: @test10( -; CHECK-NOT: memset -; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false) -; CHECK-NOT: memset -; CHECK: ret void } ; Memset followed by odd store. define void @test11(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test11( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 0 +; CHECK-NEXT: [[ARRAYIDX_CAST:%.*]] = bitcast i32* [[ARRAYIDX]] to i96* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i96* [[ARRAYIDX_CAST]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 1, i64 23, i1 false) +; CHECK-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 3 %0 = bitcast i32* %add.ptr to i8* @@ -295,20 +410,22 @@ entry: %arrayidx.cast = bitcast i32* %arrayidx to i96* store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4 ret void -; CHECK-LABEL: @test11( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 1, i64 23, i1 false) } ; Alignment should be preserved when there is a store with default align define void @test12(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test12( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: store i32 0, i32* %P %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false) ret void -; CHECK-LABEL: @test12( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } diff --git a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll index b7e3160c7da73..1bab2f65799aa 100644 --- a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll +++ b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start ; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s @@ -16,30 +17,32 @@ declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be transformed to a memmove. define void @test1(i8* %P, i8* %Q) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[R]], i32 32, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) ret void -; CHECK-LABEL: @test1( -; CHECK-NEXT: %memtmp = alloca %0, align 16 -; CHECK-NEXT: %R = bitcast %0* %memtmp to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) -; CHECK-NEXT: ret void } ; The invariant.start intrinsic does not inhibit tranforming the memcpy to a ; memset. define void @test2(i8* %dst1, i8* %dst2, i8 %c) { -; CHECK-LABEL: define void @test2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst2, i8 %c, i64 128, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[DST1]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2:%.*]], i8 [[C]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 128, i1 false) diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index ad14bdd6df661..f998a194d688b 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -O2 -S | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to @@ -8,10 +9,13 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) { +; CHECK-LABEL: @_ZN4CordC2EOS_( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[ARG1:%.*]], i64 7 +; CHECK-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: ret void +; bb: -; CHECK-LABEL: @_ZN4CordC2EOS_ -; CHECK-NOT: call void @llvm.lifetime.start -; CHECK: ret void %tmp = alloca [8 x i8], align 8 %tmp5 = bitcast [8 x i8]* %tmp to i8* call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp5) diff --git a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll index c3f7a11272815..1c61132eb2d22 100644 --- a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll @@ -35,9 +35,9 @@ define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) { define void @f(%T* %a, %T* %b, %T* %c, %T* %d) { ; CHECK-LABEL: @f( ; CHECK-NEXT: [[VAL:%.*]] = load [[T:%.*]], %T* [[A:%.*]], align 4, !alias.scope !0 -; CHECK-NEXT: store [[T]] { i8 23, i32 23 }, %T* [[B:%.*]], !alias.scope !3 -; CHECK-NEXT: store [[T]] { i8 44, i32 44 }, %T* [[C:%.*]], !alias.scope !6, !noalias !3 -; CHECK-NEXT: store [[T]] %val, %T* [[D:%.*]], !alias.scope !9, !noalias !12 +; CHECK-NEXT: store [[T]] { i8 23, i32 23 }, %T* [[B:%.*]], align 4, !alias.scope !3 +; CHECK-NEXT: store [[T]] { i8 44, i32 44 }, %T* [[C:%.*]], align 4, !alias.scope !6, !noalias !3 +; CHECK-NEXT: store [[T]] [[VAL]], %T* [[D:%.*]], align 4, !alias.scope !9, !noalias !12 ; CHECK-NEXT: ret void ; %val = load %T, %T* %a, !alias.scope !{!10} diff --git a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll index 0f8a70a5511dc..9b0098a499d98 100644 --- a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll +++ b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -basic-aa -memcpyopt | FileCheck %s ; @@ -6,19 +7,22 @@ target triple = "x86_64-apple-darwin10.0.0" %"class.std::auto_ptr" = type { i32* } -; CHECK-LABEL: @_Z3foov( define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp { +; CHECK-LABEL: @_Z3foov( +; CHECK-NEXT: _ZNSt8auto_ptrIiED1Ev.exit: +; CHECK-NEXT: [[TEMP_LVALUE:%.*]] = alloca %"class.std::auto_ptr", align 8 +; CHECK-NEXT: call void @_Z3barv(%"class.std::auto_ptr"* sret [[AGG_RESULT:%.*]]) +; CHECK-NEXT: [[TMP_I_I:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* [[TEMP_LVALUE]], i64 0, i32 0 +; CHECK-NEXT: [[TMP_I_I4:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* [[AGG_RESULT]], i64 0, i32 0 +; CHECK-NEXT: ret void +; _ZNSt8auto_ptrIiED1Ev.exit: %temp.lvalue = alloca %"class.std::auto_ptr", align 8 -; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result) call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue) %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0 -; CHECK-NOT: load %tmp2.i.i = load i32*, i32** %tmp.i.i, align 8 %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 -; CHECK-NOT: store store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8 -; CHECK: ret void ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll index 1424ca3709cc9..97237a6e68dd3 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -1,89 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind @undef = internal constant i32 undef, align 4 define void @test_undef() nounwind { +; CHECK-LABEL: @test_undef( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 undef, i64 4, i1 false) +; CHECK-NEXT: ret void +; %a = alloca i32, align 4 %i8 = bitcast i32* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false) ret void -; CHECK-LABEL: @test_undef( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } @i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4 define void @test_i32x3() nounwind { +; CHECK-LABEL: @test_i32x3( +; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast [3 x i32]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 -1, i64 12, i1 false) +; CHECK-NEXT: ret void +; %a = alloca [3 x i32], align 4 %i8 = bitcast [3 x i32]* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false) ret void -; CHECK-LABEL: @test_i32x3( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } @i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4 define void @test_i32x3_undef() nounwind { +; CHECK-LABEL: @test_i32x3_undef( +; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast [3 x i32]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 -1, i64 12, i1 false) +; CHECK-NEXT: ret void +; %a = alloca [3 x i32], align 4 %i8 = bitcast [3 x i32]* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false) ret void -; CHECK-LABEL: @test_i32x3_undef( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } %struct.bitfield = type { i8, [3 x i8] } @bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4 define void @test_bitfield() nounwind { +; CHECK-LABEL: @test_bitfield( +; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_BITFIELD:%.*]], align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast %struct.bitfield* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 -86, i64 4, i1 false) +; CHECK-NEXT: ret void +; %a = alloca %struct.bitfield, align 4 %i8 = bitcast %struct.bitfield* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false) ret void -; CHECK-LABEL: @test_bitfield( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } @i1x16_zero = internal constant <16 x i1> , align 4 define void @test_i1x16_zero() nounwind { +; CHECK-LABEL: @test_i1x16_zero( +; CHECK-NEXT: [[A:%.*]] = alloca <16 x i1>, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast <16 x i1>* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; %a = alloca <16 x i1>, align 4 %i8 = bitcast <16 x i1>* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false) ret void -; CHECK-LABEL: @test_i1x16_zero( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } ; i1 isn't currently handled. Should it? @i1x16_one = internal constant <16 x i1> , align 4 define void @test_i1x16_one() nounwind { +; CHECK-LABEL: @test_i1x16_one( +; CHECK-NEXT: [[A:%.*]] = alloca <16 x i1>, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast <16 x i1>* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[I8]], i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false) +; CHECK-NEXT: ret void +; %a = alloca <16 x i1>, align 4 %i8 = bitcast <16 x i1>* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false) ret void -; CHECK-LABEL: @test_i1x16_one( -; CHECK-NOT: call void @llvm.memset -; CHECK: call void @llvm.memcpy -; CHECK: ret void } @half = internal constant half 0xH0000, align 4 define void @test_half() nounwind { +; CHECK-LABEL: @test_half( +; CHECK-NEXT: [[A:%.*]] = alloca half, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast half* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 0, i64 2, i1 false) +; CHECK-NEXT: ret void +; %a = alloca half, align 4 %i8 = bitcast half* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false) ret void -; CHECK-LABEL: @test_half( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll index 5cdd1a27258c5..e1dd9c92d4e03 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -6,6 +7,16 @@ target triple = "x86_64-apple-macosx10.8.0" %struct.foo = type { i8, [7 x i8], i32 } define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[BLETCH_SROA_1:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], %struct.foo* [[FOOBIE:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i8 98, i8* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], %struct.foo* [[FOOBIE]], i64 0, i32 1, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[BLETCH_SROA_1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO]], %struct.foo* [[FOOBIE]], i64 0, i32 2 +; CHECK-NEXT: store i32 20, i32* [[TMP4]], align 4 +; CHECK-NEXT: ret i32 undef +; %bletch.sroa.1 = alloca [7 x i8], align 1 %1 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0 store i8 98, i8* %1, align 4 @@ -17,28 +28,31 @@ define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable ret i32 undef ; Check that the memcpy is removed. -; CHECK-LABEL: @test1( -; CHECK-NOT: call void @llvm.memcpy } define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[IN:%.*]]) +; CHECK-NEXT: ret void +; call void @llvm.lifetime.start.p0i8(i64 8, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false) ret void ; Check that the memcpy is removed. -; CHECK-LABEL: @test2( -; CHECK-NOT: call void @llvm.memcpy } define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test3( +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[IN:%.*]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[OUT:%.*]], i8* [[IN]], i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.lifetime.start.p0i8(i64 4, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false) ret void ; Check that the memcpy is not removed. -; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memcpy } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 1741da030c2ed..54e5e75fd6e23 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck -enable-var-scope %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -7,6 +8,16 @@ target triple = "i686-apple-darwin9" %1 = type { i32, i32 } define void @test1(%0* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[Z_1:%.*]] +; CHECK-NEXT: call void @ccoshl(%0* sret [[TMP2]], x86_fp80 [[TMP5]], x86_fp80 [[Z_0:%.*]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[TMP219:%.*]] = bitcast %0* [[TMP2]] to i8* +; CHECK-NEXT: [[AGG_RESULT21:%.*]] = bitcast %0* [[AGG_RESULT:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[AGG_RESULT21]], i8* align 16 [[TMP219]], i32 32, i1 false) +; CHECK-NEXT: ret void +; entry: %tmp2 = alloca %0 %memtmp = alloca %0, align 16 @@ -22,11 +33,6 @@ entry: ; Check that one of the memcpy's are removed. ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here. -; CHECK-LABEL: @test1( -; CHECK: call void @ccoshl -; CHECK: call void @llvm.memcpy -; CHECK-NOT: llvm.memcpy -; CHECK: ret void } declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind @@ -35,29 +41,31 @@ declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be related with a memmove. define void @test2(i8* %P, i8* %Q) nounwind { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) ret void -; CHECK-LABEL: @test2( -; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* align 16 %Q, i8* align 16 %P -; CHECK-NEXT: ret void } ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be related with a memcpy. define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind { +; CHECK-LABEL: @test2_memcpy( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) ret void -; CHECK-LABEL: @test2_memcpy( -; CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align 16 %Q, i8* align 16 %P -; CHECK-NEXT: ret void } @@ -66,40 +74,47 @@ define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind { @x = external global %0 define void @test3(%0* noalias sret %agg.result) nounwind { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[AGG_RESULT1:%.*]] = bitcast %0* [[AGG_RESULT:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[AGG_RESULT1]], i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false) +; CHECK-NEXT: ret void +; %x.0 = alloca %0 %x.01 = bitcast %0* %x.0 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x.01, i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false) %agg.result2 = bitcast %0* %agg.result to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false) ret void -; CHECK-LABEL: @test3( -; CHECK-NEXT: %agg.result1 = bitcast -; CHECK-NEXT: call void @llvm.memcpy -; CHECK-NEXT: ret void } ; PR8644 define void @test4(i8 *%P) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: call void @test4a(i8* byval align 1 [[P:%.*]]) +; CHECK-NEXT: ret void +; %A = alloca %1 %a = bitcast %1* %A to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false) call void @test4a(i8* align 1 byval %a) ret void -; CHECK-LABEL: @test4( -; CHECK-NEXT: call void @test4a( } ; Make sure we don't remove the memcpy if the source address space doesn't match the byval argument define void @test4_addrspace(i8 addrspace(1)* %P) { - %A = alloca %1 - %a = bitcast %1* %A to i8* - call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a, i8 addrspace(1)* align 4 %P, i64 8, i1 false) - call void @test4a(i8* align 1 byval %a) - ret void ; CHECK-LABEL: @test4_addrspace( -; CHECK: call void @llvm.memcpy.p0i8.p1i8.i64( -; CHECK-NEXT: call void @test4a( +; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 +; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 [[A2]], i8 addrspace(1)* align 4 [[P:%.*]], i64 8, i1 false) +; CHECK-NEXT: call void @test4a(i8* byval align 1 [[A2]]) +; CHECK-NEXT: ret void +; + %a1 = alloca %1 + %a2 = bitcast %1* %a1 to i8* + call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a2, i8 addrspace(1)* align 4 %P, i64 8, i1 false) + call void @test4a(i8* align 1 byval %a2) + ret void } declare void @test4a(i8* align 1 byval) @@ -116,6 +131,16 @@ declare void @test5a(%struct.S* align 16 byval) nounwind ssp ; rdar://8713376 - This memcpy can't be eliminated. define i32 @test5(i32 %x) nounwind ssp { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Y:%.*]] = alloca [[STRUCT_S:%.*]], align 16 +; CHECK-NEXT: [[TMP:%.*]] = bitcast %struct.S* [[Y]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP]], i8* align 16 bitcast (%struct.S* @sS to i8*), i64 32, i1 false) +; CHECK-NEXT: [[A:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[Y]], i64 0, i32 1, i64 0 +; CHECK-NEXT: store i8 4, i8* [[A]], align 1 +; CHECK-NEXT: call void @test5a(%struct.S* byval align 16 [[Y]]) +; CHECK-NEXT: ret i32 0 +; entry: %y = alloca %struct.S, align 16 %tmp = bitcast %struct.S* %y to i8* @@ -124,17 +149,15 @@ entry: store i8 4, i8* %a call void @test5a(%struct.S* align 16 byval %y) ret i32 0 - ; CHECK-LABEL: @test5( - ; CHECK: store i8 4 - ; CHECK: call void @test5a(%struct.S* byval align 16 %y) } ;; Noop memcpy should be zapped. define void @test6(i8 *%P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: ret void +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %P, i8* align 4 %P, i64 8, i1 false) ret void -; CHECK-LABEL: @test6( -; CHECK-NEXT: ret void } @@ -143,6 +166,11 @@ define void @test6(i8 *%P) { %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp { +; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @g(%struct.p* byval align 8 [[Q:%.*]]) [[ATTR0]] +; CHECK-NEXT: ret i32 [[CALL]] +; entry: %agg.tmp = alloca %struct.p, align 4 %tmp = bitcast %struct.p* %agg.tmp to i8* @@ -150,8 +178,6 @@ entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp, i8* align 4 %tmp1, i64 48, i1 false) %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind ret i32 %call -; CHECK-LABEL: @test7( -; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[$NUW:#[0-9]+]] } declare i32 @g(%struct.p* align 8 byval) @@ -163,8 +189,9 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) n @test8.str = internal constant [7 x i8] c"ABCDEF\00" define void @test8() { -; CHECK: test8 -; CHECK-NOT: memcpy +; CHECK-LABEL: @test8( +; CHECK-NEXT: ret void +; %A = tail call i8* @malloc(i32 10) %B = getelementptr inbounds i8, i8* %A, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i1 false) @@ -172,7 +199,6 @@ define void @test8() { %D = getelementptr inbounds i8, i8* %C, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i1 false) ret void -; CHECK: ret void } declare noalias i8* @malloc(i32) @@ -181,11 +207,14 @@ declare noalias i8* @malloc(i32) %struct.big = type { [50 x i32] } define void @test9_addrspacecast() nounwind ssp uwtable { -entry: ; CHECK-LABEL: @test9_addrspacecast( -; CHECK: f1 -; CHECK-NOT: memcpy -; CHECK: f2 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4 +; CHECK-NEXT: call void @f1(%struct.big* sret [[B]]) +; CHECK-NEXT: call void @f2(%struct.big* [[B]]) +; CHECK-NEXT: ret void +; +entry: %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 call void @f1(%struct.big* sret %tmp) @@ -197,11 +226,14 @@ entry: } define void @test9() nounwind ssp uwtable { +; CHECK-LABEL: @test9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4 +; CHECK-NEXT: call void @f1(%struct.big* sret [[B]]) +; CHECK-NEXT: call void @f2(%struct.big* [[B]]) +; CHECK-NEXT: ret void +; entry: -; CHECK: test9 -; CHECK: f1 -; CHECK-NOT: memcpy -; CHECK: f2 %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 call void @f1(%struct.big* sret %tmp) @@ -220,6 +252,15 @@ entry: declare void @foo(i32* noalias nocapture) define void @test10(%opaque* noalias nocapture sret %x, i32 %y) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[Y:%.*]], i32* [[A]], align 4 +; CHECK-NEXT: call void @foo(i32* noalias nocapture [[A]]) +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[D:%.*]] = bitcast %opaque* [[X:%.*]] to i32* +; CHECK-NEXT: store i32 [[C]], i32* [[D]], align 4 +; CHECK-NEXT: ret void +; %a = alloca i32, align 4 store i32 %y, i32* %a call void @foo(i32* noalias nocapture %a) @@ -231,14 +272,17 @@ define void @test10(%opaque* noalias nocapture sret %x, i32 %y) { ; don't create new addressspacecasts when we don't know they're safe for the target define void @test11([20 x i32] addrspace(1)* nocapture dereferenceable(80) %P) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[B:%.*]] = bitcast [20 x i32] addrspace(1)* [[P:%.*]] to i8 addrspace(1)* +; CHECK-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 [[B]], i8 0, i64 80, i1 false) +; CHECK-NEXT: ret void +; %A = alloca [20 x i32], align 4 %a = bitcast [20 x i32]* %A to i8* %b = bitcast [20 x i32] addrspace(1)* %P to i8 addrspace(1)* call void @llvm.memset.p0i8.i64(i8* align 4 %a, i8 0, i64 80, i1 false) call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* align 4 %b, i8* align 4 %a, i64 80, i1 false) ret void -; CHECK-LABEL: @test11( -; CHECK-NOT: addrspacecast } declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @@ -247,7 +291,7 @@ declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocaptur declare void @f1(%struct.big* nocapture sret) declare void @f2(%struct.big*) -; CHECK: attributes [[$NUW]] = { nounwind } +; CHECK: attributes [[ATTR0]] = { nounwind } ; CHECK: attributes #1 = { argmemonly nounwind willreturn } ; CHECK: attributes #2 = { nounwind ssp } ; CHECK: attributes #3 = { nounwind ssp uwtable } diff --git a/llvm/test/Transforms/MemCpyOpt/memmove.ll b/llvm/test/Transforms/MemCpyOpt/memmove.ll index d152cfb63f2b7..4a75cfe6a0460 100644 --- a/llvm/test/Transforms/MemCpyOpt/memmove.ll +++ b/llvm/test/Transforms/MemCpyOpt/memmove.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s ; These memmoves should get optimized to memcpys. @@ -7,9 +8,15 @@ target triple = "x86_64-apple-darwin9.0" declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define i8* @test1(i8* nocapture %src) nounwind { -entry: ; CHECK-LABEL: @test1( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MALLOCCALL:%.*]] = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) +; CHECK-NEXT: [[CALL3:%.*]] = bitcast i8* [[MALLOCCALL]] to [13 x i8]* +; CHECK-NEXT: [[CALL3_SUB:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* [[CALL3]], i64 0, i64 0 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL3_SUB]], i8* [[SRC:%.*]], i64 13, i1 false) +; CHECK-NEXT: ret i8* [[CALL3_SUB]] +; +entry: %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) %call3 = bitcast i8* %malloccall to [13 x i8]* @@ -21,9 +28,13 @@ declare noalias i8* @malloc(i32) define void @test2(i8* %P) nounwind { -entry: ; CHECK-LABEL: @test2( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 16 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[ADD_PTR]], i64 16, i1 false) +; CHECK-NEXT: ret void +; +entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i1 false) ret void @@ -31,9 +42,13 @@ entry: ; This cannot be optimize because the src/dst really do overlap. define void @test3(i8* %P) nounwind { -entry: ; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memmove +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 16 +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[ADD_PTR]], i64 17, i1 false) +; CHECK-NEXT: ret void +; +entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false) ret void diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll index 7ee0682ed2295..52ac35ba5da53 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll @@ -130,7 +130,7 @@ define void @test_write_between(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) -; CHECK-NEXT: store i8 -1, i8* [[B]] +; CHECK-NEXT: store i8 -1, i8* [[B]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void ; @@ -148,7 +148,7 @@ define void @test_write_before_memset_in_memset_region(i8* %result) { ; CHECK-LABEL: @test_write_before_memset_in_memset_region( ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* -; CHECK-NEXT: store i8 -1, i8* [[B]] +; CHECK-NEXT: store i8 -1, i8* [[B]], align 1 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void @@ -168,7 +168,7 @@ define void @test_write_before_memset_in_memcpy_region(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 2 -; CHECK-NEXT: store i32 -1, i32* [[C]] +; CHECK-NEXT: store i32 -1, i32* [[C]], align 4 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void @@ -189,7 +189,7 @@ define void @test_write_before_memset_in_both_regions(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 1 -; CHECK-NEXT: store i32 -1, i32* [[C]] +; CHECK-NEXT: store i32 -1, i32* [[C]], align 4 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 10, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll index 651ac3194a157..758a093a3b65a 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -1,126 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -memcpyopt -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: define void @test -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i32_i64 -; CHECK: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i64 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i32_i64( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[DST_SIZE:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP1]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP5]], i8 [[C:%.*]], i64 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i128_i32 -; CHECK: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i128 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i128 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i128 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 [[SRCSIZE]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i128_i32(i8* %dst, i8* %src, i128 %dst_size, i32 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i128_i32( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[SRC_SIZE:%.*]] to i128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[DST_SIZE:%.*]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i128 [[DST_SIZE]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i128 [[TMP1]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[TMP5]], i8 [[C:%.*]], i128 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DST]], i8* [[SRC:%.*]], i32 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i32_i128 -; CHECK: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i128 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i128 [[DSTSIZE]], %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i128 [[DSTSIZE]], %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i32_i128(i8* %dst, i8* %src, i32 %dst_size, i128 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i32_i128( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[DST_SIZE:%.*]] to i128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[TMP1]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i128 [[TMP1]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i128 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[TMP5]], i8 [[C:%.*]], i128 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* [[DST]], i8* [[SRC:%.*]], i128 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i64_i32 -; CHECK: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i64 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 [[SRCSIZE]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i64_i32( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[SRC_SIZE:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST_SIZE]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[TMP1]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP5]], i8 [[C:%.*]], i64 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DST]], i8* [[SRC:%.*]], i32 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_align_same -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false) define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_same( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], 80 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], 80 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 80 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 80, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i1 false) ret void } -; CHECK-LABEL: define void @test_align_min -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 {{.*}}, i1 false) define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_min( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], 36 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], 36 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 36 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP4]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 36, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i1 false) ret void } -; CHECK-LABEL: define void @test_align_memcpy -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false) define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_memcpy( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], 80 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], 80 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 80 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC:%.*]], i64 80, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 80, i1 false) ret void } -; CHECK-LABEL: define void @test_non_i8_dst_type -; CHECK-NEXT: %dst = bitcast i64* %dst_pi64 to i8* -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test_non_i8_dst_type( +; CHECK-NEXT: [[DST:%.*]] = bitcast i64* [[DST_PI64:%.*]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; %dst = bitcast i64* %dst_pi64 to i8* call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_dst -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_different_dst( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST:%.*]], i8 0, i64 [[DST_SIZE:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[SRC:%.*]], i64 [[SRC_SIZE:%.*]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i1 false) ret void @@ -128,12 +157,13 @@ define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i6 ; Make sure we also take into account dependencies on the destination. -; CHECK-LABEL: define i8 @test_intermediate_read -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false) -; CHECK-NEXT: %r = load i8, i8* %a -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i1 false) -; CHECK-NEXT: ret i8 %r define i8 @test_intermediate_read(i8* %a, i8* %b) #0 { +; CHECK-LABEL: @test_intermediate_read( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A:%.*]], i8 0, i64 64, i1 false) +; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[A]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A]], i8* [[B:%.*]], i64 24, i1 false) +; CHECK-NEXT: ret i8 [[R]] +; call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false) %r = load i8, i8* %a call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i1 false) @@ -142,15 +172,16 @@ define i8 @test_intermediate_read(i8* %a, i8* %b) #0 { %struct = type { [8 x i8], [8 x i8] } -; CHECK-LABEL: define void @test_intermediate_write -; CHECK-NEXT: %a = alloca %struct -; CHECK-NEXT: %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 -; CHECK-NEXT: %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i1 false) -; CHECK-NEXT: store i8 1, i8* %a1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i1 false) -; CHECK-NEXT: ret void define void @test_intermediate_write(i8* %b) #0 { +; CHECK-LABEL: @test_intermediate_write( +; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT:%.*]], align 8 +; CHECK-NEXT: [[A0:%.*]] = getelementptr [[STRUCT]], %struct* [[A]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[A1:%.*]] = getelementptr [[STRUCT]], %struct* [[A]], i32 0, i32 1, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A0]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, i8* [[A1]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A0]], i8* [[B:%.*]], i64 8, i1 false) +; CHECK-NEXT: ret void +; %a = alloca %struct %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll index e36389a128f99..8867c4f810b28 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll @@ -1,73 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: define void @test( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst2, i8 %c, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2:%.*]], i8 [[C]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_smaller_memcpy( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false) -; CHECK-NEXT: ret void define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_smaller_memcpy( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST2:%.*]], i8 [[C]], i64 100, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false) ret void } -; CHECK-LABEL: define void @test_smaller_memset( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_smaller_memset( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 100, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[DST1]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_align_memset( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_align_memset( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST2:%.*]], i8 [[C]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i1 false) -; CHECK-NEXT: ret void define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_types( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[DST2:%.*]], i8 [[C]], i32 100, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 %dst1, i8 %c, i32 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_types_2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[DST1:%.*]], i8 [[C:%.*]], i32 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST2:%.*]], i8 [[C]], i64 100, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* align 8 %dst1, i8 %c, i32 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false) ret void } -; CHECK-LABEL: define void @test_different_source_gep( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false) -; CHECK-NEXT: ret void define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_source_gep( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8* [[DST1]], i64 64 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[P]], i64 64, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) ; FIXME: We could optimize this as well. %p = getelementptr i8, i8* %dst1, i64 64 @@ -75,21 +83,23 @@ define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) { ret void } -; CHECK-LABEL: define void @test_variable_size_1( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_variable_size_1( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 [[DST1_SIZE:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[DST1]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_variable_size_2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false) -; CHECK-NEXT: ret void define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) { +; CHECK-LABEL: @test_variable_size_2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[DST1]], i64 [[DST2_SIZE:%.*]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false) ret void diff --git a/llvm/test/Transforms/MemCpyOpt/nontemporal.ll b/llvm/test/Transforms/MemCpyOpt/nontemporal.ll index d9dafcc7b8169..a67aa8cf0007b 100644 --- a/llvm/test/Transforms/MemCpyOpt/nontemporal.ll +++ b/llvm/test/Transforms/MemCpyOpt/nontemporal.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -5,16 +6,25 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Verify that we don't combine nontemporal stores into memset calls. define void @nontemporal_stores_1(<4 x float>* nocapture %dst) { -; CHECK-LABEL: @nontemporal_stores_1 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0 -; CHECK-NEXT: ret void +; CHECK-LABEL: @nontemporal_stores_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[DST:%.*]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 1 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 2 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR2]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 3 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR3]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 4 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR4]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR5:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 5 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR5]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 6 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR6]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR7:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 7 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR7]], align 16, !nontemporal !0 +; CHECK-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 @@ -35,10 +45,13 @@ entry: } define void @nontemporal_stores_2(<4 x float>* nocapture %dst) { -; CHECK-LABEL: @nontemporal_stores_2 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0 -; CHECK-NEXT: ret void +; CHECK-LABEL: @nontemporal_stores_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[DST:%.*]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 1 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; CHECK-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 diff --git a/llvm/test/Transforms/MemCpyOpt/pr29105.ll b/llvm/test/Transforms/MemCpyOpt/pr29105.ll index e9e9b611aef24..e83508606e55a 100644 --- a/llvm/test/Transforms/MemCpyOpt/pr29105.ll +++ b/llvm/test/Transforms/MemCpyOpt/pr29105.ll @@ -1,10 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -instcombine -S %s | FileCheck %s %Foo = type { [2048 x i64] } ; Make sure that all mempcy calls are converted to memset calls, or removed. -; CHECK-LABEL: @baz( -; CHECK-NOT: call void @llvm.memcpy define void @baz() unnamed_addr #0 { +; CHECK-LABEL: @baz( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: [[TMP2:%.*]] = alloca [[FOO:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %Foo* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16384, i8* nonnull [[TMP0]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(16384) [[TMP0]], i8 0, i64 16384, i1 false) +; CHECK-NEXT: call void @bar(%Foo* noalias nocapture nonnull dereferenceable(16384) [[TMP2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16384, i8* nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; entry-block: %x.sroa.0 = alloca [2048 x i64], align 8 %tmp0 = alloca [2048 x i64], align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/pr37967.ll b/llvm/test/Transforms/MemCpyOpt/pr37967.ll index 6b6a408732736..8a4e88881d0c0 100644 --- a/llvm/test/Transforms/MemCpyOpt/pr37967.ll +++ b/llvm/test/Transforms/MemCpyOpt/pr37967.ll @@ -1,16 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -debugify -memcpyopt -check-debugify -S < %s 2>&1 | FileCheck %s ; CHECK: CheckModuleDebugify: PASS -; CHECK-LABEL: define {{.*}} @_Z3bar3Foo -; CHECK: [[target:%.*]] = load i8*, i8** bitcast (%struct.Foo** @a to i8**), align 8, !dbg -; CHECK: %tmpcast = bitcast i8* [[target]] to %struct.Foo*, !dbg - %struct.Foo = type { i64, i64, i64 } @a = dso_local global %struct.Foo* null, align 8 define dso_local void @_Z3bar3Foo(%struct.Foo* byval(%struct.Foo) align 8 %0) { +; CHECK-LABEL: @_Z3bar3Foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8, [[DBG13:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.Foo* [[AGG_TMP]], [[META9:metadata !.*]], metadata !DIExpression()), [[DBG13]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** bitcast (%struct.Foo** @a to i8**), align 8, [[DBG14:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[TMP1]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG14]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.Foo* [[AGG_TMP]] to i8*, [[DBG15:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[TMP2]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG15]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(24) [[TMP2]], i8* nonnull align 8 dereferenceable(24) [[TMP1]], i64 24, i1 false), [[DBG16:!dbg !.*]] +; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i8* [[TMP1]] to %struct.Foo*, [[DBG16]] +; CHECK-NEXT: call void @_Z3bar3Foo(%struct.Foo* nonnull byval(%struct.Foo) align 8 [[TMPCAST]]), [[DBG17:!dbg !.*]] +; CHECK-NEXT: ret void, [[DBG18:!dbg !.*]] +; entry: %agg.tmp = alloca %struct.Foo, align 8 %1 = load i8*, i8** bitcast (%struct.Foo** @a to i8**), align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/process_store.ll b/llvm/test/Transforms/MemCpyOpt/process_store.ll index e2edef0a94f70..7b647e556b565 100644 --- a/llvm/test/Transforms/MemCpyOpt/process_store.ll +++ b/llvm/test/Transforms/MemCpyOpt/process_store.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -memcpyopt -disable-output +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -memcpyopt | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -10,6 +11,17 @@ declare dso_local i32 @f1() ; Do not crash due to store first in BB. define dso_local void @f2() { +; CHECK-LABEL: @f2( +; CHECK-NEXT: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* @a, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @f1() +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1]] = load i32, i32* @b, align 4 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; for.end: %0 = load i32, i32* @b, align 4 ret void @@ -24,6 +36,19 @@ for.body: ; Do not crash due to call not before store in BB. define dso_local void @f3() { +; CHECK-LABEL: @f3( +; CHECK-NEXT: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[T:%.*]] = add i32 [[T2:%.*]], 1 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* @a, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @f1() +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[T2]] = xor i32 [[T]], 5 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; for.end: %0 = load i32, i32* @b, align 4 ret void diff --git a/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll b/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll index 649d2386f9600..c45ccb9c9abaa 100644 --- a/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll @@ -1,12 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -; CHECK-LABEL: @foo( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %2, i8 0, i64 8, i1 false) - define void @foo(i64* nocapture %P) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[P:%.*]] to i16* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 2 [[TMP2]], i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; entry: %0 = bitcast i64* %P to i16* %arrayidx = getelementptr inbounds i16, i16* %0, i64 1 diff --git a/llvm/test/Transforms/MemCpyOpt/smaller.ll b/llvm/test/Transforms/MemCpyOpt/smaller.ll index 0c82b5201dca5..1aed83fdb4d6f 100644 --- a/llvm/test/Transforms/MemCpyOpt/smaller.ll +++ b/llvm/test/Transforms/MemCpyOpt/smaller.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s ; RUN: opt -passes=memcpyopt -S < %s | FileCheck %s ; rdar://8875553 @@ -5,8 +6,6 @@ ; Memcpyopt shouldn't optimize the second memcpy using the first ; because the first has a smaller size. -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp, i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false) - target datalayout = "e-p:32:32:32" %struct.s = type { [11 x i8], i32 } @@ -18,6 +17,16 @@ declare void @check(%struct.s* byval %p) nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @foo() nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; CHECK-NEXT: store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* align 1 getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i1 false) +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[AGG_TMP]], i32 0, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP]], i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false) +; CHECK-NEXT: call void @check(%struct.s* byval [[AGG_TMP]]) +; CHECK-NEXT: ret void +; entry: %agg.tmp = alloca %struct.s, align 4 store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 diff --git a/llvm/test/Transforms/MemCpyOpt/sret.ll b/llvm/test/Transforms/MemCpyOpt/sret.ll index af625127f56ba..f5ffbeaf239f3 100644 --- a/llvm/test/Transforms/MemCpyOpt/sret.ll +++ b/llvm/test/Transforms/MemCpyOpt/sret.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -S | not grep "call.*memcpy" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -6,6 +7,24 @@ target triple = "i686-apple-darwin9" %0 = type { x86_fp80, x86_fp80 } define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind { +; CHECK-LABEL: @ccosl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IZ:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[TMP0]], %0* [[Z:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, x86_fp80* [[TMP1]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[TMP0]], %0* [[IZ]], i32 0, i32 1 +; CHECK-NEXT: [[REAL:%.*]] = getelementptr [[TMP0]], %0* [[IZ]], i32 0, i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[TMP0]], %0* [[Z]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = load x86_fp80, x86_fp80* [[TMP7]], align 16 +; CHECK-NEXT: store x86_fp80 [[TMP3]], x86_fp80* [[REAL]], align 16 +; CHECK-NEXT: store x86_fp80 [[TMP8]], x86_fp80* [[TMP4]], align 16 +; CHECK-NEXT: call void @ccoshl(%0* noalias sret [[AGG_RESULT:%.*]], %0* byval align 8 [[IZ]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[MEMTMP14:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; CHECK-NEXT: [[AGG_RESULT15:%.*]] = bitcast %0* [[AGG_RESULT]] to i8* +; CHECK-NEXT: ret void +; entry: %iz = alloca %0 %memtmp = alloca %0, align 16 diff --git a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll index 4bead3381ccda..6f7a7c898dd9c 100644 --- a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll +++ b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s ; PR40118: BasicAA didn't realize that stackrestore ends the lifetime of @@ -14,6 +15,20 @@ target triple = "i686-unknown-windows-msvc19.14.26433" ; a call to @external. define i32 @test_norestore(i32 %n) { +; CHECK-LABEL: @test_norestore( +; CHECK-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 +; CHECK-NEXT: [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 9 +; CHECK-NEXT: store i8 0, i8* [[P10]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) +; CHECK-NEXT: call void @external() +; CHECK-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[HEAP]], i8* [[P]], i32 9, i1 false) +; CHECK-NEXT: call void @useit(i8* [[HEAP]]) +; CHECK-NEXT: ret i32 0 +; %tmpmem = alloca [10 x i8], align 4 %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0 @@ -33,15 +48,25 @@ define i32 @test_norestore(i32 %n) { ret i32 0 } -; CHECK-LABEL: define i32 @test_norestore(i32 %n) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %p, i32 9, i1 false) - - ; Do not propagate memcpy from %p across the stackrestore. define i32 @test_stackrestore() { +; CHECK-LABEL: @test_stackrestore( +; CHECK-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 +; CHECK-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; CHECK-NEXT: [[ARGMEM:%.*]] = alloca inalloca [10 x i8], align 4 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[ARGMEM]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[ARGMEM]], i32 0, i32 9 +; CHECK-NEXT: store i8 0, i8* [[P10]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) +; CHECK-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; CHECK-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[HEAP]], i8* [[TMP]], i32 9, i1 false) +; CHECK-NEXT: call void @useit(i8* [[HEAP]]) +; CHECK-NEXT: ret i32 0 +; %tmpmem = alloca [10 x i8], align 4 %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0 %inalloca.save = tail call i8* @llvm.stacksave() @@ -61,11 +86,6 @@ define i32 @test_stackrestore() { ret i32 0 } -; CHECK-LABEL: define i32 @test_stackrestore() -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false) - declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) declare i8* @llvm.stacksave() declare void @llvm.stackrestore(i8*) diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll index f75b63edef359..81d3da0966d92 100644 --- a/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll +++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll @@ -65,7 +65,7 @@ define void @vector_fixed_length_nonzero(<16 x i8>* %p) { define void @vector_scalable_nonzero(* %p) { ; CHECK-LABEL: @vector_scalable_nonzero( -; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]], align 16 ; CHECK-NEXT: ret void ; store zeroinitializer, * %p diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll index 59ed892b60ee0..51651e73e2bc6 100644 --- a/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll @@ -1,8 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" define i8* @foo(i8* returned %0, i32 %1, i64 %2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP0:%.*]], i64 [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 -32 +; CHECK-NEXT: [[VV:%.*]] = trunc i32 [[TMP1:%.*]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 5 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 6 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 7 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 9 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 10 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 11 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 13 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 14 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 15 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 16 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 1 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 3 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 5 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 6 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 7 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 9 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 11 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 12 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 13 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 14 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 15 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[VV]], i64 32, i1 false) +; CHECK-NEXT: ret i8* [[TMP0]] +; entry: %3 = getelementptr inbounds i8, i8* %0, i64 %2 %4 = getelementptr inbounds i8, i8* %3, i64 -32 @@ -71,7 +111,5 @@ entry: %35 = getelementptr inbounds i8, i8* %20, i64 15 store i8 %vv, i8* %35, align 1 ret i8* %0 -; CHECK-LABEL: @foo -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %4, i8 %vv, i64 32, i1 false) } diff --git a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll index 256bd8518dc19..952a57796f87b 100644 --- a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll @@ -9,9 +9,9 @@ define void @foo(i8* %p) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to * ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr , * [[A]], i64 0 -; CHECK-NEXT: store zeroinitializer, * [[TMP0]] +; CHECK-NEXT: store zeroinitializer, * [[TMP0]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[A]], i64 1 -; CHECK-NEXT: store zeroinitializer, * [[TMP1]] +; CHECK-NEXT: store zeroinitializer, * [[TMP1]], align 16 ; CHECK-NEXT: ret void ; %a = bitcast i8* %p to * From 0867a9e85ace8ed0b11f6a7fc4c9e4bb1606263b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Oct 2020 17:36:22 +0100 Subject: [PATCH 396/544] [VPlan] Use isa<> instead of directly checking VPRecipeID (NFC). getVPRecipeID is intended to be only used in `classof` helpers. Instead of checking it directly, use isa<> with the correct recipe type. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 73ac508c389a6..d54a890a3ce20 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7889,11 +7889,11 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( unsigned FirstOpId; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { - assert(WidenRecipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC && + assert(isa(WidenRecipe) && "Expected to replace a VPWidenSelectSC"); FirstOpId = 1; } else { - assert(WidenRecipe->getVPRecipeID() == VPRecipeBase::VPWidenSC && + assert(isa(WidenRecipe) && "Expected to replace a VPWidenSC"); FirstOpId = 0; } @@ -7910,7 +7910,7 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( Kind == RecurrenceDescriptor::RK_FloatMinMax) { VPRecipeBase *CompareRecipe = RecipeBuilder.getRecipe(cast(R->getOperand(0))); - assert(CompareRecipe->getVPRecipeID() == VPRecipeBase::VPWidenSC && + assert(isa(CompareRecipe) && "Expected to replace a VPWidenSC"); CompareRecipe->eraseFromParent(); } From 07c112574a324318a02ef29901a0d5aa1fd95144 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 2 Oct 2020 09:53:30 -0700 Subject: [PATCH 397/544] [lldb] Fix bug in fallback logic for finding the resource directory. Both of the if-clauses modify the raw_path variable and only one of them was resetting the variable for the fallback. Avoid future bugs like that by always resetting the variable. Differential revision: https://reviews.llvm.org/D88704 --- lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp index 8abb7e4205757..b76fa6fbf690c 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp @@ -137,14 +137,12 @@ bool lldb_private::ComputeClangResourceDirectory(FileSpec &lldb_shlib_spec, FileSystem::Instance().Resolve(file_spec); return true; } - raw_path = lldb_shlib_spec.GetPath(); } - raw_path.resize(rev_it - r_end); - } else { - raw_path.resize(rev_it - r_end); } // Fall back to the Clang resource directory inside the framework. + raw_path = lldb_shlib_spec.GetPath(); + raw_path.resize(rev_it - r_end); raw_path.append("LLDB.framework/Resources/Clang"); file_spec.GetDirectory().SetString(raw_path.c_str()); FileSystem::Instance().Resolve(file_spec); From aac2de1b1af07448483f8cdb3a588b9504def9ac Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 21 Sep 2020 17:08:53 -0400 Subject: [PATCH 398/544] [libc++] Remove unnecessary usage of in the test suite Tests should strive to be as minimal as possible, since it makes them relevant on platforms where does not work. --- libcxx/test/support/private_constructor.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/libcxx/test/support/private_constructor.h b/libcxx/test/support/private_constructor.h index 69411a8d9ad78..24f540c6a7fdc 100644 --- a/libcxx/test/support/private_constructor.h +++ b/libcxx/test/support/private_constructor.h @@ -6,10 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef __PRIVATE_CONSTRUCTOR__H -#define __PRIVATE_CONSTRUCTOR__H - -#include +#ifndef TEST_SUPPORT_PRIVATE_CONSTRUCTOR_H +#define TEST_SUPPORT_PRIVATE_CONSTRUCTOR_H struct PrivateConstructor { @@ -25,6 +23,4 @@ bool operator < ( const PrivateConstructor &lhs, const PrivateConstructor &rhs ) bool operator < ( const PrivateConstructor &lhs, int rhs ) { return lhs.get() < rhs; } bool operator < ( int lhs, const PrivateConstructor &rhs ) { return lhs < rhs.get(); } -std::ostream & operator << ( std::ostream &os, const PrivateConstructor &foo ) { return os << foo.get (); } - -#endif +#endif // TEST_SUPPORT_PRIVATE_CONSTRUCTOR_H From d0dd7cadbd1f8fd57ac09dc9bb9c853ff0329988 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 18:04:56 +0100 Subject: [PATCH 399/544] [InstCombine] Add trunc(bswap(trunc/zext(x))) vector tests --- llvm/test/Transforms/InstCombine/bswap.ll | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index af9350d1c4e0f..d6f0792504887 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -379,6 +379,24 @@ define i16 @test10(i32 %a) { ret i16 %conv } +define <2 x i16> @test10_vector(<2 x i32> %a) { +; CHECK-LABEL: @test10_vector( +; CHECK-NEXT: [[SHR1:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[SHR1]], +; CHECK-NEXT: [[AND2:%.*]] = shl <2 x i32> [[A]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[AND1]], [[AND2]] +; CHECK-NEXT: [[CONV:%.*]] = trunc <2 x i32> [[OR]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[CONV]] +; + %shr1 = lshr <2 x i32> %a, + %and1 = and <2 x i32> %shr1, + %and2 = shl <2 x i32> %a, + %shl1 = and <2 x i32> %and2, + %or = or <2 x i32> %and1, %shl1 + %conv = trunc <2 x i32> %or to <2 x i16> + ret <2 x i16> %conv +} + define i64 @PR39793_bswap_u64_as_u32(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u32( ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 @@ -437,6 +455,23 @@ define i64 @PR39793_bswap_u64_as_u16(i64 %0) { ret i64 %6 } +define <2 x i64> @PR39793_bswap_u64_as_u16_vector(<2 x i64> %0) { +; CHECK-LABEL: @PR39793_bswap_u64_as_u16_vector( +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP3]], [[TMP5]] +; CHECK-NEXT: ret <2 x i64> [[TMP6]] +; + %2 = lshr <2 x i64> %0, + %3 = and <2 x i64> %2, + %4 = shl <2 x i64> %0, + %5 = and <2 x i64> %4, + %6 = or <2 x i64> %3, %5 + ret <2 x i64> %6 +} + define i8 @PR39793_bswap_u64_as_u16_trunc(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u16_trunc( ; CHECK-NEXT: [[REV1:%.*]] = lshr i64 [[TMP0:%.*]], 8 From d1c8e179d8773f82cdba818dac25667224a9e8d1 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Fri, 2 Oct 2020 10:11:22 -0700 Subject: [PATCH 400/544] [mlir][vector] Add canonicalization patterns for extractMap/insertMap Add basic canonicalization patterns for the extractMap/insertMap to allow them to be folded into Transfer ops. Also mark transferRead as memory read so that it can be removed by dead code. Differential Revision: https://reviews.llvm.org/D88622 --- mlir/include/mlir/Dialect/Vector/VectorOps.td | 2 + mlir/lib/Dialect/Vector/VectorOps.cpp | 8 +++ mlir/lib/Dialect/Vector/VectorTransforms.cpp | 54 ++++++++++++++++++- .../Dialect/Vector/vector-distribution.mlir | 21 ++++++++ .../lib/Transforms/TestVectorTransforms.cpp | 2 + 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 42e947071403f..137e130c45943 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -517,6 +517,8 @@ def Vector_ExtractMapOp : $vector `[` $id `:` $multiplicity `]` attr-dict `:` type($vector) `to` type(results) }]; + + let hasFolder = 1; } def Vector_FMAOp : diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 1a83c556d47bb..663595ce161c4 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -923,6 +923,14 @@ static LogicalResult verify(ExtractMapOp op) { return success(); } +OpFoldResult ExtractMapOp::fold(ArrayRef operands) { + auto insert = vector().getDefiningOp(); + if (insert == nullptr || multiplicity() != insert.multiplicity() || + id() != insert.id()) + return {}; + return insert.vector(); +} + //===----------------------------------------------------------------------===// // BroadcastOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 6a244a454e06d..20b928fb9a81c 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -12,6 +12,7 @@ #include +#include "mlir/Dialect/Affine/EDSC/Builders.h" #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h" @@ -2452,6 +2453,55 @@ mlir::vector::distributPointwiseVectorOp(OpBuilder &builder, Operation *op, return ops; } +struct TransferReadExtractPattern + : public OpRewritePattern { + TransferReadExtractPattern(MLIRContext *context) + : OpRewritePattern(context) {} + LogicalResult matchAndRewrite(vector::TransferReadOp read, + PatternRewriter &rewriter) const override { + if (!read.getResult().hasOneUse()) + return failure(); + auto extract = + dyn_cast(*read.getResult().getUsers().begin()); + if (!extract) + return failure(); + edsc::ScopedContext scope(rewriter, read.getLoc()); + using mlir::edsc::op::operator+; + using namespace mlir::edsc::intrinsics; + SmallVector indices(read.indices().begin(), read.indices().end()); + indices.back() = indices.back() + extract.id(); + Value newRead = vector_transfer_read(extract.getType(), read.memref(), + indices, read.permutation_map(), + read.padding(), ArrayAttr()); + newRead = rewriter.create( + read.getLoc(), newRead, extract.id(), extract.multiplicity()); + rewriter.replaceOp(read, newRead); + return success(); + } +}; + +struct TransferWriteInsertPattern + : public OpRewritePattern { + TransferWriteInsertPattern(MLIRContext *context) + : OpRewritePattern(context) {} + LogicalResult matchAndRewrite(vector::TransferWriteOp write, + PatternRewriter &rewriter) const override { + auto insert = write.vector().getDefiningOp(); + if (!insert) + return failure(); + edsc::ScopedContext scope(rewriter, write.getLoc()); + using mlir::edsc::op::operator+; + using namespace mlir::edsc::intrinsics; + SmallVector indices(write.indices().begin(), + write.indices().end()); + indices.back() = indices.back() + insert.id(); + vector_transfer_write(insert.vector(), write.memref(), indices, + write.permutation_map(), ArrayAttr()); + rewriter.eraseOp(write); + return success(); + } +}; + // TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp). // TODO: Add this as DRR pattern. void mlir::vector::populateVectorToVectorTransformationPatterns( @@ -2461,7 +2511,9 @@ void mlir::vector::populateVectorToVectorTransformationPatterns( ShapeCastOpFolder, SplitTransferReadOp, SplitTransferWriteOp, - TupleGetFolderOp>(context); + TupleGetFolderOp, + TransferReadExtractPattern, + TransferWriteInsertPattern>(context); // clang-format on } diff --git a/mlir/test/Dialect/Vector/vector-distribution.mlir b/mlir/test/Dialect/Vector/vector-distribution.mlir index 0216a017d7af0..264e0195b4ab2 100644 --- a/mlir/test/Dialect/Vector/vector-distribution.mlir +++ b/mlir/test/Dialect/Vector/vector-distribution.mlir @@ -11,3 +11,24 @@ func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) %0 = addf %A, %B : vector<32xf32> return %0: vector<32xf32> } + +// CHECK-LABEL: func @vector_add_read_write +// CHECK-SAME: (%[[ID:.*]]: index +// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%{{.*}}], %{{.*}} : memref<32xf32>, vector<1xf32> +// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%{{.*}}], %{{.*}} : memref<32xf32>, vector<1xf32> +// CHECK-NEXT: %[[ADD1:.*]] = addf %[[EXA]], %[[EXB]] : vector<1xf32> +// CHECK-NEXT: %[[EXC:.*]] = vector.transfer_read %{{.*}}[%{{.*}}], %{{.*}} : memref<32xf32>, vector<1xf32> +// CHECK-NEXT: %[[ADD2:.*]] = addf %[[ADD1]], %[[EXC]] : vector<1xf32> +// CHECK-NEXT: vector.transfer_write %[[ADD2]], %{{.*}}[%{{.*}}] : vector<1xf32>, memref<32xf32> +// CHECK-NEXT: return +func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) { + %c0 = constant 0 : index + %cf0 = constant 0.0 : f32 + %a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32> + %b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32> + %acc = addf %a, %b: vector<32xf32> + %c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32> + %d = addf %acc, %c: vector<32xf32> + vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32> + return +} diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index 2ffe10bc16824..c1faf23d85df4 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -129,6 +129,7 @@ struct TestVectorDistributePatterns : public PassWrapper { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); + registry.insert(); } void runOnFunction() override { MLIRContext *ctx = &getContext(); @@ -143,6 +144,7 @@ struct TestVectorDistributePatterns op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp); }); patterns.insert(ctx); + populateVectorToVectorTransformationPatterns(patterns, ctx); applyPatternsAndFoldGreedily(getFunction(), patterns); } }; From 0364721e3ef2edca318e27b9453adf977911dbb1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Oct 2020 18:16:55 +0100 Subject: [PATCH 401/544] Revert rG3d14a1e982ad27 - "[InstCombine] recognizeBSwapOrBitReverseIdiom - support for 'partial' bswap patterns (PR47191)" This reverts commit 3d14a1e982ad27111346471564d575ad5efc6419. This is breaking on some 2stage clang buildbots --- .../InstCombine/InstCombineAndOrXor.cpp | 35 +++-- llvm/lib/Transforms/Utils/Local.cpp | 39 +----- llvm/test/Transforms/InstCombine/bswap.ll | 123 +++++++++++++++--- 3 files changed, 135 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index edb2dc8881c7b..cbc3f5a2532f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2046,18 +2046,29 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) { Op1 = Ext->getOperand(0); // (A | B) | C and A | (B | C) -> bswap if possible. - bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())); - - // (A >> B) | C and (A << B) | C -> bswap if possible. - bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || - match(Op1, m_LogicalShift(m_Value(), m_Value())); - - // (A & B) | C and A | (B & C) -> bswap if possible. - bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || - match(Op1, m_And(m_Value(), m_Value())); - - if (!OrWithOrs && !OrWithShifts && !OrWithAnds) + bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); + + // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. + bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())); + + // (A & B) | (C & D) -> bswap if possible. + bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && + match(Op1, m_And(m_Value(), m_Value())); + + // (A << B) | (C & D) -> bswap if possible. + // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a + // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935, + // C2 = 8 for i32). + // This pattern can occur when the operands of the 'or' are not canonicalized + // for some reason (not having only one use, for example). + bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_And(m_Value(), m_Value()))) || + (match(Op0, m_And(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value()))); + + if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh) return nullptr; SmallVector Insts; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0c27d803946e1..0fd0dfa24ce96 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2940,24 +2940,6 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } - // BSWAP - most likely due to us previous matching a partial bswap. - if (match(V, m_BSwap(m_Value(X)))) { - const auto &Res = - collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); - if (!Res) - return Result; - - unsigned ByteWidth = BitWidth / 8; - Result = BitPart(Res->Provider, BitWidth); - for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { - unsigned ByteBitOfs = ByteIdx * 8; - for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) - Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = - Res->Provenance[ByteBitOfs + BitIdx]; - } - return Result; - } - // Funnel 'double' shifts take 3 operands, 2 inputs and the shift // amount (modulo). // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -3050,15 +3032,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. unsigned DemandedBW = DemandedTy->getBitWidth(); - APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; bool OKForBitReverse = MatchBitReversals; for (unsigned BitIdx = 0; (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { - if (BitProvenance[BitIdx] == BitPart::Unset) { - DemandedMask.clearBit(BitIdx); - continue; - } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, DemandedBW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], @@ -3073,6 +3050,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( else return false; + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); Value *Provider = Res->Provider; // We may need to truncate the provider. @@ -3083,19 +3061,12 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( Provider = Trunc; } - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); - Instruction *Result = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(Result); - - if (!DemandedMask.isAllOnesValue()) { - auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); - Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); - InsertedInsts.push_back(Result); - } + auto *CI = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(CI); // We may need to zeroextend back to the result type. - if (ITy != Result->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); + if (ITy != CI->getType()) { + auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index d6f0792504887..aac34178efd46 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -534,8 +534,14 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { define i32 @partial_bswap(i32 %x) { ; CHECK-LABEL: @partial_bswap( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[X3:%.*]] = shl i32 [[X:%.*]], 24 +; CHECK-NEXT: [[A2:%.*]] = shl i32 [[X]], 8 +; CHECK-NEXT: [[X2:%.*]] = and i32 [[A2]], 16711680 +; CHECK-NEXT: [[X32:%.*]] = or i32 [[X3]], [[X2]] +; CHECK-NEXT: [[T1:%.*]] = and i32 [[X]], -65536 +; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[T1]]) +; CHECK-NEXT: [[R:%.*]] = or i32 [[X32]], [[T2]] +; CHECK-NEXT: ret i32 [[R]] ; %x3 = shl i32 %x, 24 %a2 = shl i32 %x, 8 @@ -572,9 +578,10 @@ declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -599,9 +606,13 @@ define i64 @bswap_and_mask_1(i64 %0) { define i64 @bswap_and_mask_2(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_2( -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] +; CHECK-NEXT: ret i64 [[TMP7]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -724,8 +735,28 @@ define i32 @funnel_binary(i32 %abcd) { define i64 @PR47191_problem1(i64 %0) { ; CHECK-LABEL: @PR47191_problem1( -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 +; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] +; CHECK-NEXT: ret i64 [[TMP22]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -753,8 +784,28 @@ define i64 @PR47191_problem1(i64 %0) { define i64 @PR47191_problem2(i64 %0) { ; CHECK-LABEL: @PR47191_problem2( -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] +; CHECK-NEXT: ret i64 [[TMP22]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -782,8 +833,28 @@ define i64 @PR47191_problem2(i64 %0) { define i64 @PR47191_problem3(i64 %0) { ; CHECK-LABEL: @PR47191_problem3( -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 +; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] +; CHECK-NEXT: ret i64 [[TMP22]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -811,8 +882,28 @@ define i64 @PR47191_problem3(i64 %0) { define i64 @PR47191_problem4(i64 %0) { ; CHECK-LABEL: @PR47191_problem4( -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] +; CHECK-NEXT: ret i64 [[TMP22]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 From b1bf24667fc3ec5dc4b541148d0d722ffa28a6df Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 25 Sep 2020 10:59:20 -0700 Subject: [PATCH 402/544] [AlwaysInliner] Update BFI when inlining Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D88324 --- llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 8 ++- .../Inline/prof-update-sample-alwaysinline.ll | 60 +++++++++++++++++++ .../Transforms/Inline/prof-update-sample.ll | 1 - 3 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index a9cf363ec98ff..f3b23ea77bcd9 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -39,7 +40,7 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - InlineFunctionInfo IFI(/*cg=*/nullptr, GetAssumptionCache); + auto &PSI = MAM.getResult(M); SmallSetVector Calls; bool Changed = false; @@ -67,6 +68,11 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller, *OIC, false, DEBUG_TYPE); + InlineFunctionInfo IFI( + /*cg=*/nullptr, GetAssumptionCache, &PSI, + &FAM.getResult(*(CB->getCaller())), + &FAM.getResult(F)); + InlineResult Res = InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime); assert(Res.isSuccess() && "unexpected failure to inline"); diff --git a/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll new file mode 100644 index 0000000000000..5bb5834faefd6 --- /dev/null +++ b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -passes=always-inline -S | FileCheck %s +; Checks if always-inline updates branch_weights annotation for call instructions. + +declare void @ext(); +declare void @ext1(); +@func = global void ()* null + +; CHECK: define void @callee(i32 %n) #0 !prof ![[ENTRY_COUNT:[0-9]*]] +define void @callee(i32 %n) #0 !prof !15 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false +cond_true: +; ext1 is optimized away, thus not updated. +; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] + call void @ext1(), !prof !16 + ret void +cond_false: +; ext is cloned and updated. +; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] + call void @ext(), !prof !16 + %f = load void ()*, void ()** @func +; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] + call void %f(), !prof !18 + ret void +} + +; CHECK: define void @caller() +define void @caller() { +; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] +; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] + call void @callee(i32 15), !prof !17 + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!17 = !{!"branch_weights", i64 400} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +attributes #0 = { alwaysinline } +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} +; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} +; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} +; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12} +; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i64 800} +; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8} diff --git a/llvm/test/Transforms/Inline/prof-update-sample.ll b/llvm/test/Transforms/Inline/prof-update-sample.ll index 4a4471e8e17a8..add861b880f9d 100644 --- a/llvm/test/Transforms/Inline/prof-update-sample.ll +++ b/llvm/test/Transforms/Inline/prof-update-sample.ll @@ -51,7 +51,6 @@ define void @caller() { !16 = !{!"branch_weights", i64 2000} !17 = !{!"branch_weights", i64 400} !18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} -attributes #0 = { alwaysinline } ; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} ; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} ; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} From 9b8c0b8b465f439226b5d2bd8f71d55436801bd9 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 2 Oct 2020 10:34:51 -0700 Subject: [PATCH 403/544] Revert "[AlwaysInliner] Update BFI when inlining" This reverts commit b1bf24667fc3ec5dc4b541148d0d722ffa28a6df. --- llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 8 +-- .../Inline/prof-update-sample-alwaysinline.ll | 60 ------------------- .../Transforms/Inline/prof-update-sample.ll | 1 + 3 files changed, 2 insertions(+), 67 deletions(-) delete mode 100644 llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index f3b23ea77bcd9..a9cf363ec98ff 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -40,7 +39,7 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - auto &PSI = MAM.getResult(M); + InlineFunctionInfo IFI(/*cg=*/nullptr, GetAssumptionCache); SmallSetVector Calls; bool Changed = false; @@ -68,11 +67,6 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller, *OIC, false, DEBUG_TYPE); - InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, &PSI, - &FAM.getResult(*(CB->getCaller())), - &FAM.getResult(F)); - InlineResult Res = InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime); assert(Res.isSuccess() && "unexpected failure to inline"); diff --git a/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll deleted file mode 100644 index 5bb5834faefd6..0000000000000 --- a/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt < %s -passes=always-inline -S | FileCheck %s -; Checks if always-inline updates branch_weights annotation for call instructions. - -declare void @ext(); -declare void @ext1(); -@func = global void ()* null - -; CHECK: define void @callee(i32 %n) #0 !prof ![[ENTRY_COUNT:[0-9]*]] -define void @callee(i32 %n) #0 !prof !15 { - %cond = icmp sle i32 %n, 10 - br i1 %cond, label %cond_true, label %cond_false -cond_true: -; ext1 is optimized away, thus not updated. -; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] - call void @ext1(), !prof !16 - ret void -cond_false: -; ext is cloned and updated. -; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] - call void @ext(), !prof !16 - %f = load void ()*, void ()** @func -; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] - call void %f(), !prof !18 - ret void -} - -; CHECK: define void @caller() -define void @caller() { -; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] -; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] - call void @callee(i32 15), !prof !17 - ret void -} - -!llvm.module.flags = !{!1} -!1 = !{i32 1, !"ProfileSummary", !2} -!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} -!3 = !{!"ProfileFormat", !"SampleProfile"} -!4 = !{!"TotalCount", i64 10000} -!5 = !{!"MaxCount", i64 10} -!6 = !{!"MaxInternalCount", i64 1} -!7 = !{!"MaxFunctionCount", i64 2000} -!8 = !{!"NumCounts", i64 2} -!9 = !{!"NumFunctions", i64 2} -!10 = !{!"DetailedSummary", !11} -!11 = !{!12, !13, !14} -!12 = !{i32 10000, i64 100, i32 1} -!13 = !{i32 999000, i64 100, i32 1} -!14 = !{i32 999999, i64 1, i32 2} -!15 = !{!"function_entry_count", i64 1000} -!16 = !{!"branch_weights", i64 2000} -!17 = !{!"branch_weights", i64 400} -!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} -attributes #0 = { alwaysinline } -; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} -; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} -; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} -; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12} -; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i64 800} -; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8} diff --git a/llvm/test/Transforms/Inline/prof-update-sample.ll b/llvm/test/Transforms/Inline/prof-update-sample.ll index add861b880f9d..4a4471e8e17a8 100644 --- a/llvm/test/Transforms/Inline/prof-update-sample.ll +++ b/llvm/test/Transforms/Inline/prof-update-sample.ll @@ -51,6 +51,7 @@ define void @caller() { !16 = !{!"branch_weights", i64 2000} !17 = !{!"branch_weights", i64 400} !18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +attributes #0 = { alwaysinline } ; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} ; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} ; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} From 354ba1cb8006c9126851e1b006f799de52ecb7bc Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 2 Oct 2020 10:31:56 -0700 Subject: [PATCH 404/544] [gn build] Don't define CINDEX_EXPORTS This causes ../../clang/include\clang-c/Platform.h(23,11): warning: 'CINDEX_EXPORTS' macro redefined [-Wmacro-redefined] #define CINDEX_EXPORTS --- llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn index 1078ec8f2f430..19e1c3c1cce7c 100644 --- a/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn @@ -52,7 +52,6 @@ target(libclang_target_type, "libclang") { if (host_os == "win") { defines += [ - "CINDEX_EXPORTS", "_CINDEX_LIB_", ] } From eb55735073d53f7816b9a4080e6f54dfeda5ae50 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 25 Sep 2020 10:59:20 -0700 Subject: [PATCH 405/544] Reland [AlwaysInliner] Update BFI when inlining Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D88324 --- clang/test/CodeGen/lto-newpm-pipeline.c | 2 + llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 8 ++- .../Inline/prof-update-sample-alwaysinline.ll | 60 +++++++++++++++++++ .../Transforms/Inline/prof-update-sample.ll | 1 - 4 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index 9694cef326d50..ad3f076e5d8bf 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -28,6 +28,7 @@ // CHECK-FULL-O0: Starting llvm::Module pass manager run. // CHECK-FULL-O0: Running pass: AlwaysInlinerPass // CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy +// CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass @@ -36,6 +37,7 @@ // CHECK-THIN-O0: Starting llvm::Module pass manager run. // CHECK-THIN-O0: Running pass: AlwaysInlinerPass // CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy +// CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index a9cf363ec98ff..f3b23ea77bcd9 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -39,7 +40,7 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - InlineFunctionInfo IFI(/*cg=*/nullptr, GetAssumptionCache); + auto &PSI = MAM.getResult(M); SmallSetVector Calls; bool Changed = false; @@ -67,6 +68,11 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller, *OIC, false, DEBUG_TYPE); + InlineFunctionInfo IFI( + /*cg=*/nullptr, GetAssumptionCache, &PSI, + &FAM.getResult(*(CB->getCaller())), + &FAM.getResult(F)); + InlineResult Res = InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime); assert(Res.isSuccess() && "unexpected failure to inline"); diff --git a/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll new file mode 100644 index 0000000000000..5bb5834faefd6 --- /dev/null +++ b/llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -passes=always-inline -S | FileCheck %s +; Checks if always-inline updates branch_weights annotation for call instructions. + +declare void @ext(); +declare void @ext1(); +@func = global void ()* null + +; CHECK: define void @callee(i32 %n) #0 !prof ![[ENTRY_COUNT:[0-9]*]] +define void @callee(i32 %n) #0 !prof !15 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false +cond_true: +; ext1 is optimized away, thus not updated. +; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] + call void @ext1(), !prof !16 + ret void +cond_false: +; ext is cloned and updated. +; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] + call void @ext(), !prof !16 + %f = load void ()*, void ()** @func +; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] + call void %f(), !prof !18 + ret void +} + +; CHECK: define void @caller() +define void @caller() { +; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] +; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] + call void @callee(i32 15), !prof !17 + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!17 = !{!"branch_weights", i64 400} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +attributes #0 = { alwaysinline } +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} +; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} +; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} +; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12} +; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i64 800} +; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8} diff --git a/llvm/test/Transforms/Inline/prof-update-sample.ll b/llvm/test/Transforms/Inline/prof-update-sample.ll index 4a4471e8e17a8..add861b880f9d 100644 --- a/llvm/test/Transforms/Inline/prof-update-sample.ll +++ b/llvm/test/Transforms/Inline/prof-update-sample.ll @@ -51,7 +51,6 @@ define void @caller() { !16 = !{!"branch_weights", i64 2000} !17 = !{!"branch_weights", i64 400} !18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} -attributes #0 = { alwaysinline } ; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} ; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} ; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} From a8938f3da319f4cc17b80ebab582a6c77efa6705 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 2 Oct 2020 10:20:31 -0700 Subject: [PATCH 406/544] scudo: Simplify AtomicOptions::setFillContentsMode. NFCI. Differential Revision: https://reviews.llvm.org/D88747 --- compiler-rt/lib/scudo/standalone/options.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/options.h b/compiler-rt/lib/scudo/standalone/options.h index 4f387a37f4826..3051e8af4f7af 100644 --- a/compiler-rt/lib/scudo/standalone/options.h +++ b/compiler-rt/lib/scudo/standalone/options.h @@ -54,16 +54,14 @@ struct AtomicOptions { } void setFillContentsMode(FillContentsMode FillContents) { - while (1) { - u32 Opts = atomic_load(&Val, memory_order_relaxed); - u32 NewOpts = Opts; + u32 Opts = atomic_load(&Val, memory_order_relaxed), NewOpts; + do { + NewOpts = Opts; NewOpts &= ~(3U << static_cast(OptionBit::FillContents0of2)); NewOpts |= static_cast(FillContents) << static_cast(OptionBit::FillContents0of2); - if (atomic_compare_exchange_strong(&Val, &Opts, NewOpts, - memory_order_relaxed)) - break; - } + } while (!atomic_compare_exchange_strong(&Val, &Opts, NewOpts, + memory_order_relaxed)); } }; From 7468afe9ca135228f4c5a48f1b061ca57786fad6 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 29 Sep 2020 22:29:26 -0700 Subject: [PATCH 407/544] [DAE] MarkLive in MarkValue(MaybeLive) if any use is live While looping through all args or all return values, we may mark a use of a later iteration as live. Previously when we got to that later value it would ignore that and continue adding to Uses instead of marking it live. For example, when looping through arg#0 and arg#1, MarkValue(arg#0, Live) may cause some use of arg#1 to be live, but MarkValue(arg#1, MaybeLive) will not notice that and continue adding into Uses. Now MarkValue(RA, MaybeLive) will MarkLive(RA) if any use is live. Fixes PR47444. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D88529 --- .../Transforms/IPO/DeadArgumentElimination.h | 1 + .../IPO/DeadArgumentElimination.cpp | 29 +++++++++++------ .../DeadArgElim/preserve-used-ret.ll | 32 +++++++++++++++++++ 3 files changed, 53 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/DeadArgElim/preserve-used-ret.ll diff --git a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h index 73797bc10017c..496ceea12bc96 100644 --- a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h +++ b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h @@ -128,6 +128,7 @@ class DeadArgumentEliminationPass Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); void SurveyFunction(const Function &F); + bool IsLive(const RetOrArg &RA); void MarkValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses); void MarkLive(const RetOrArg &RA); diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index af5f72f6b6365..0b763e423fe0e 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -357,7 +357,7 @@ DeadArgumentEliminationPass::Liveness DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { // We're live if our use or its Function is already marked as live. - if (LiveFunctions.count(Use.F) || LiveValues.count(Use)) + if (IsLive(Use)) return Live; // We're maybe live otherwise, but remember that we must become live if @@ -657,10 +657,18 @@ void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L, MarkLive(RA); break; case MaybeLive: - // Note any uses of this value, so this return value can be - // marked live whenever one of the uses becomes live. - for (const auto &MaybeLiveUse : MaybeLiveUses) - Uses.insert(std::make_pair(MaybeLiveUse, RA)); + assert(!IsLive(RA) && "Use is already live!"); + for (const auto &MaybeLiveUse : MaybeLiveUses) { + if (IsLive(MaybeLiveUse)) { + // A use is live, so this value is live. + MarkLive(RA); + break; + } else { + // Note any uses of this value, so this value can be + // marked live whenever one of the uses becomes live. + Uses.insert(std::make_pair(MaybeLiveUse, RA)); + } + } break; } } @@ -686,17 +694,20 @@ void DeadArgumentEliminationPass::MarkLive(const Function &F) { /// mark any values that are used by this value (according to Uses) live as /// well. void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) { - if (LiveFunctions.count(RA.F)) - return; // Function was already marked Live. + if (IsLive(RA)) + return; // Already marked Live. - if (!LiveValues.insert(RA).second) - return; // We were already marked Live. + LiveValues.insert(RA); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking " << RA.getDescription() << " live\n"); PropagateLiveness(RA); } +bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) { + return LiveFunctions.count(RA.F) || LiveValues.count(RA); +} + /// PropagateLiveness - Given that RA is a live value, propagate it's liveness /// to any other values it uses (according to Uses). void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { diff --git a/llvm/test/Transforms/DeadArgElim/preserve-used-ret.ll b/llvm/test/Transforms/DeadArgElim/preserve-used-ret.ll new file mode 100644 index 0000000000000..f0c2649fdb393 --- /dev/null +++ b/llvm/test/Transforms/DeadArgElim/preserve-used-ret.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -deadargelim %s | FileCheck %s + +define internal { i64, i64 } @f(i64 %a, i64 %b) { +start: + %0 = insertvalue { i64, i64 } undef, i64 %a, 0 + %1 = insertvalue { i64, i64 } %0, i64 %b, 1 + ret { i64, i64 } %1 +} + +; Check that we don't delete either of g's return values + +; CHECK-LABEL: define internal { i64, i64 } @g(i64 %a, i64 %b) +define internal { i64, i64 } @g(i64 %a, i64 %b) { +start: + %0 = call { i64, i64 } @f(i64 %a, i64 %b) + ret { i64, i64 } %0 +} + +declare dso_local i32 @test(i64, i64) + +define i32 @main(i32 %argc, i8** %argv) { +start: + %x = call { i64, i64 } @g(i64 13, i64 42) + %x.0 = extractvalue { i64, i64 } %x, 0 + %x.1 = extractvalue { i64, i64 } %x, 1 + %z = bitcast i64 %x.0 to i64 + %y = call { i64, i64 } @f(i64 %x.0, i64 %x.1) + %y.1 = extractvalue { i64, i64 } %y, 1 + %0 = call i32 @test(i64 %x.0, i64 %y.1) + ret i32 %0 +} + From 84feca6a84d90c5c0b8ecbcffc68e8e4b1285f32 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 2 Oct 2020 19:16:37 +0200 Subject: [PATCH 408/544] [MemCpyOpt] Add tests from D40802 (NFC) Even though that patch didn't stick, we should retain the test coverage. --- .../MemCpyOpt/memcpy-invoke-memcpy.ll | 76 ++++++++ .../Transforms/MemCpyOpt/merge-into-memset.ll | 42 +++++ llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll | 54 ++++++ .../MemCpyOpt/nonlocal-memcpy-memcpy.ll | 172 ++++++++++++++++++ 4 files changed, 344 insertions(+) create mode 100644 llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll create mode 100644 llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll create mode 100644 llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll create mode 100644 llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll new file mode 100644 index 0000000000000..6a2529d03430b --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -memcpyopt -S | FileCheck %s + +; Test memcpy-memcpy dependencies across invoke edges. + +; Test that memcpyopt works across the non-unwind edge of an invoke. +; TODO: Not supported yet. + +define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @test_normal( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; CHECK-NEXT: invoke void @invoke_me() +; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: lpad: +; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: ret void +; CHECK: try.cont: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) +; CHECK-NEXT: ret void +; +entry: + %temp = alloca i8, i32 64 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %temp, i8* nonnull align 8 %src, i64 64, i1 false) + invoke void @invoke_me() + to label %try.cont unwind label %lpad + +lpad: + landingpad { i8*, i32 } + catch i8* null + ret void + +try.cont: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %temp, i64 64, i1 false) + ret void +} + +; Test that memcpyopt works across the unwind edge of an invoke. +; TODO: Not supported yet. + +define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @test_unwind( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; CHECK-NEXT: invoke void @invoke_me() +; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: lpad: +; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) +; CHECK-NEXT: ret void +; CHECK: try.cont: +; CHECK-NEXT: ret void +; +entry: + %temp = alloca i8, i32 64 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %temp, i8* nonnull align 8 %src, i64 64, i1 false) + invoke void @invoke_me() + to label %try.cont unwind label %lpad + +lpad: + landingpad { i8*, i32 } + catch i8* null + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %temp, i64 64, i1 false) + ret void + +try.cont: + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare i32 @__gxx_personality_v0(...) +declare void @invoke_me() readnone diff --git a/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll b/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll new file mode 100644 index 0000000000000..af3fe4155f91e --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -memcpyopt -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Don't delete the memcpy in %if.then, even though it depends on an instruction +; which will be deleted. + +define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[F:%.*]], i8* nonnull align 8 [[TMP4]], i64 30, i1 false) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca [50 x i8], align 8 + %tmp4 = bitcast [50 x i8]* %tmp to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1 + call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i1 false) + store i8 0, i8* %tmp4, align 8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i1 false) + br i1 %c, label %if.then, label %exit + +if.then: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %f, i8* nonnull align 8 %tmp4, i64 30, i1 false) + br label %exit + +exit: + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) diff --git a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll new file mode 100644 index 0000000000000..eb4a86fe5286a --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -memcpyopt -S | FileCheck %s +; Handle memcpy-memcpy dependencies of differing sizes correctly. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Don't delete the second memcpy, even though there's an earlier +; memcpy with a larger size from the same address. + +define i32 @foo(i1 %z) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 +; CHECK-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 +; CHECK-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; CHECK-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] +; CHECK: for.body3.lr.ph: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 17179869180, i1 false) +; CHECK-NEXT: br label [[FOR_INC7_1]] +; CHECK: for.inc7.1: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: ret i32 [[TMP2]] +; +entry: + %a = alloca [10 x i32] + %s = alloca [10 x i32] + %0 = bitcast [10 x i32]* %a to i8* + %1 = bitcast [10 x i32]* %s to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %1, i8 0, i64 40, i1 false) + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0 + store i32 1, i32* %arrayidx + %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1 + %scevgep7 = bitcast i32* %scevgep to i8* + br i1 %z, label %for.body3.lr.ph, label %for.inc7.1 + +for.body3.lr.ph: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %scevgep7, i64 17179869180, i1 false) + br label %for.inc7.1 + +for.inc7.1: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %scevgep7, i64 4, i1 false) + %2 = load i32, i32* %arrayidx + ret i32 %2 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) diff --git a/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll new file mode 100644 index 0000000000000..f682b71e8c306 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -memcpyopt -S | FileCheck %s + +; Test whether memcpy-memcpy dependence is optimized across +; basic blocks (conditional branches and invokes). +; TODO: This is not supported yet. + +%struct.s = type { i32, i32 } + +@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 +@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 +@i = external constant i8* + +declare void @qux() +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare void @__cxa_throw(i8*, i8*, i8*) +declare i32 @__gxx_personality_v0(...) +declare i8* @__cxa_begin_catch(i8*) + +; A simple partial redundancy. Test that the second memcpy is optimized +; to copy directly from the original source rather than from the temporary. + +define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) { +; CHECK-LABEL: @wobble( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; CHECK-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] +; CHECK: out: +; CHECK-NEXT: call void @qux() +; CHECK-NEXT: unreachable +; CHECK: more: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) +; CHECK-NEXT: ret void +; +bb: + %temp = alloca i8, i32 64 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %temp, i8* nonnull align 8%src, i64 64, i1 false) + br i1 %some_condition, label %more, label %out + +out: + call void @qux() + unreachable + +more: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %temp, i64 64, i1 false) + ret void +} + +; A CFG triangle with a partial redundancy targeting an alloca. Test that the +; memcpy inside the triangle is optimized to copy directly from the original +; source rather than from the temporary. + +define i32 @foo(i1 %t3) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; CHECK-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) +; CHECK-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* +; CHECK-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 [[S6]], i64 8, i1 false) +; CHECK-NEXT: br label [[BB7]] +; CHECK: bb7: +; CHECK-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; CHECK-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 +; CHECK-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; CHECK-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 +; CHECK-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] +; CHECK-NEXT: ret i32 [[T12]] +; +bb: + %s = alloca %struct.s, align 4 + %t = alloca %struct.s, align 4 + %s1 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %s1, i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) + br i1 %t3, label %bb4, label %bb7 + +bb4: ; preds = %bb + %t5 = bitcast %struct.s* %t to i8* + %s6 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %t5, i8* align 4 %s6, i64 8, i1 false) + br label %bb7 + +bb7: ; preds = %bb4, %bb + %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0 + %t9 = load i32, i32* %t8, align 4 + %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1 + %t11 = load i32, i32* %t10, align 4 + %t12 = add i32 %t9, %t11 + ret i32 %t12 +} + +; A CFG diamond with an invoke on one side, and a partially redundant memcpy +; into an alloca on the other. Test that the memcpy inside the diamond is +; optimized to copy ; directly from the original source rather than from the +; temporary. This more complex test represents a relatively common usage +; pattern. + +define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @baz( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; CHECK-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) +; CHECK-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] +; CHECK: bb6: +; CHECK-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) +; CHECK-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] +; CHECK: bb9: +; CHECK-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: br label [[BB13:%.*]] +; CHECK: bb13: +; CHECK-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) +; CHECK-NEXT: br label [[BB23:%.*]] +; CHECK: bb22: +; CHECK-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* +; CHECK-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 [[S24]], i64 8, i1 false) +; CHECK-NEXT: br label [[BB23]] +; CHECK: bb23: +; CHECK-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; CHECK-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 +; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; CHECK-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 +; CHECK-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] +; CHECK-NEXT: ret i32 [[T21]] +; CHECK: bb25: +; CHECK-NEXT: unreachable +; +bb: + %s = alloca %struct.s, align 4 + %t = alloca %struct.s, align 4 + %s3 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %s3, i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) + br i1 %t5, label %bb6, label %bb22 + +bb6: ; preds = %bb + invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) + to label %bb25 unwind label %bb9 + +bb9: ; preds = %bb6 + %t10 = landingpad { i8*, i32 } + catch i8* null + br label %bb13 + +bb13: ; preds = %bb9 + %t15 = call i8* @__cxa_begin_catch(i8* null) + br label %bb23 + +bb22: ; preds = %bb + %t23 = bitcast %struct.s* %t to i8* + %s24 = bitcast %struct.s* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %t23, i8* align 4 %s24, i64 8, i1 false) + br label %bb23 + +bb23: ; preds = %bb22, %bb13 + %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0 + %t18 = load i32, i32* %t17, align 4 + %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1 + %t20 = load i32, i32* %t19, align 4 + %t21 = add nsw i32 %t18, %t20 + ret i32 %t21 + +bb25: ; preds = %bb6 + unreachable +} From 1a92de0064bcf9ef605ca9456812adf411e9ee4e Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 2 Oct 2020 14:29:48 -0400 Subject: [PATCH 409/544] [libc++] NFCI: Remove _LIBCPP_EXTERN_TEMPLATE2 This seems to have been added a long time ago as a temporary help for debugging some issue, but it's really the same as _LIBCPP_EXTERN_TEMPLATE. --- libcxx/include/__config | 5 --- libcxx/include/__locale | 12 +++---- libcxx/include/locale | 72 ++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 47 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 1b87a6b439965..1b63573007237 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -904,17 +904,12 @@ typedef unsigned int char32_t; #ifdef _LIBCPP_DISABLE_EXTERN_TEMPLATE #define _LIBCPP_EXTERN_TEMPLATE(...) -#define _LIBCPP_EXTERN_TEMPLATE2(...) #endif #ifndef _LIBCPP_EXTERN_TEMPLATE #define _LIBCPP_EXTERN_TEMPLATE(...) extern template __VA_ARGS__; #endif -#ifndef _LIBCPP_EXTERN_TEMPLATE2 -#define _LIBCPP_EXTERN_TEMPLATE2(...) extern template __VA_ARGS__; -#endif - #ifndef _LIBCPP_EXTERN_TEMPLATE_DEFINE #define _LIBCPP_EXTERN_TEMPLATE_DEFINE(...) template __VA_ARGS__; #endif diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 6d10fa4d3d64a..4721a00337ef1 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -335,8 +335,8 @@ collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) const return static_cast(__h); } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) // template class collate_byname; @@ -1263,10 +1263,10 @@ codecvt_byname<_InternT, _ExternT, _StateT>::~codecvt_byname() { } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) template struct __narrow_to_utf8 diff --git a/libcxx/include/locale b/libcxx/include/locale index 3fe44300227a7..33f53d7916345 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -561,8 +561,8 @@ __num_get<_CharT>::__stage2_float_loop(_CharT __ct, bool& __in_units, char& __ex return 0; } -_LIBCPP_EXTERN_TEMPLATE2(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get) -_LIBCPP_EXTERN_TEMPLATE2(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get) +_LIBCPP_EXTERN_TEMPLATE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get) +_LIBCPP_EXTERN_TEMPLATE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get) template > class _LIBCPP_TEMPLATE_VIS num_get @@ -1099,8 +1099,8 @@ num_get<_CharT, _InputIterator>::do_get(iter_type __b, iter_type __e, return __b; } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get) struct _LIBCPP_TYPE_VIS __num_put_base { @@ -1249,8 +1249,8 @@ __num_put<_CharT>::__widen_and_group_float(char* __nb, char* __np, char* __ne, __op = __ob + (__np - __nb); } -_LIBCPP_EXTERN_TEMPLATE2(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put) -_LIBCPP_EXTERN_TEMPLATE2(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put) +_LIBCPP_EXTERN_TEMPLATE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put) +_LIBCPP_EXTERN_TEMPLATE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put) template > class _LIBCPP_TEMPLATE_VIS num_put @@ -1676,8 +1676,8 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, return __pad_and_output(__s, __o, __op, __oe, __iob, __fl); } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put) template _LIBCPP_HIDDEN @@ -2362,8 +2362,8 @@ time_get<_CharT, _InputIterator>::do_get(iter_type __b, iter_type __e, return __b; } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get) class _LIBCPP_TYPE_VIS __time_get { @@ -2462,8 +2462,8 @@ private: virtual const string_type& __X() const {return this->__X_;} }; -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname) class _LIBCPP_TYPE_VIS __time_put { @@ -2575,8 +2575,8 @@ time_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base&, return _VSTD::copy(__nb, __ne, __s); } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put) template > class _LIBCPP_TEMPLATE_VIS time_put_byname @@ -2596,8 +2596,8 @@ protected: ~time_put_byname() {} }; -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname) // money_base @@ -2663,10 +2663,10 @@ template const bool moneypunct<_CharT, _International>::intl; -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct) // moneypunct_byname @@ -2720,10 +2720,10 @@ template<> _LIBCPP_FUNC_VIS void moneypunct_byname::init(const char* template<> _LIBCPP_FUNC_VIS void moneypunct_byname::init(const char*); template<> _LIBCPP_FUNC_VIS void moneypunct_byname::init(const char*); -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname) // money_get @@ -2779,8 +2779,8 @@ __money_get<_CharT>::__gather_info(bool __intl, const locale& __loc, } } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get) template > class _LIBCPP_TEMPLATE_VIS money_get @@ -3162,8 +3162,8 @@ money_get<_CharT, _InputIterator>::do_get(iter_type __b, iter_type __e, return __b; } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get) // money_put @@ -3337,8 +3337,8 @@ __money_put<_CharT>::__format(char_type* __mb, char_type*& __mi, char_type*& __m __mi = __mb; } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put) template > class _LIBCPP_TEMPLATE_VIS money_put @@ -3490,8 +3490,8 @@ money_put<_CharT, _OutputIterator>::do_put(iter_type __s, bool __intl, return __pad_and_output(__s, __mb, __mi, __me, __iob, __fl); } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put) // messages @@ -3606,8 +3606,8 @@ messages<_CharT>::do_close(catalog __c) const #endif // _LIBCPP_HAS_CATOPEN } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages) template class _LIBCPP_TEMPLATE_VIS messages_byname @@ -3630,8 +3630,8 @@ protected: ~messages_byname() {} }; -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname) template, From 1e020b2a1783b5db54d124923b975626e51ebeb1 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 2 Oct 2020 11:26:22 -0700 Subject: [PATCH 410/544] Update legalizer-info-validation.mir test to test all opcodes. The test doesn't fail if we add opcodes to the end of the opcodes definition list, so we were missing some. --- .../GlobalISel/legalizer-info-validation.mir | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 4d49365a8dabb..63892e4f2ab1f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -1,5 +1,5 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s \ -# RUN: -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK +# R UN: llc -mtriple=aarch64-- -run-pass=legalizer %s \ +# R UN: -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK # RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s -debug-only=legalizer-info \ # RUN: -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,DEBUG @@ -563,6 +563,56 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_ADDRSPACE_CAST (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_BLOCK_ADDR (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: G_JUMP_TABLE (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: G_DYN_STACKALLOC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_STRICT_FADD (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FSUB (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FMUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FDIV (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FMA (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_STRICT_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_READ_REGISTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_WRITE_REGISTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_MEMCPY (opcode {{[0-9]+}}): 3 type indices, 1 imm index +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to 208 +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_MEMSET (opcode {{[0-9]+}}): 3 type indices, 1 imm index +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to 208 +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # CHECK-NOT: ill-defined From 322519ee1276cd7cc5f32b3fe335d0b804c8b8c0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 2 Oct 2020 11:43:17 -0700 Subject: [PATCH 411/544] [llc] Initialize TargetOptions after Triple is available Some targets have different defaults. This patch defers initialization of `TargetOptions` so that a future patch can pass `TargetOptions` to `InitTargetOptionsFromCodeGenFlags` Reviewed By: jasonliu Differential Revision: https://reviews.llvm.org/D88748 --- llvm/tools/llc/llc.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 95f2963ecbd61..98a2735887c02 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -424,14 +424,17 @@ static int compileModule(char **argv, LLVMContext &Context) { case '3': OLvl = CodeGenOpt::Aggressive; break; } - TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - Options.DisableIntegratedAS = NoIntegratedAssembler; - Options.MCOptions.ShowMCEncoding = ShowMCEncoding; - Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory; - Options.MCOptions.AsmVerbose = AsmVerbose; - Options.MCOptions.PreserveAsmComments = PreserveComments; - Options.MCOptions.IASSearchPaths = IncludeDirs; - Options.MCOptions.SplitDwarfFile = SplitDwarfFile; + TargetOptions Options; + auto InitializeOptions = [&](const Triple &TheTriple) { + Options = codegen::InitTargetOptionsFromCodeGenFlags(); + Options.DisableIntegratedAS = NoIntegratedAssembler; + Options.MCOptions.ShowMCEncoding = ShowMCEncoding; + Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory; + Options.MCOptions.AsmVerbose = AsmVerbose; + Options.MCOptions.PreserveAsmComments = PreserveComments; + Options.MCOptions.IASSearchPaths = IncludeDirs; + Options.MCOptions.SplitDwarfFile = SplitDwarfFile; + }; Optional RM = codegen::getExplicitRelocModel(); @@ -466,6 +469,7 @@ static int compileModule(char **argv, LLVMContext &Context) { exit(1); } + InitializeOptions(TheTriple); Target = std::unique_ptr(TheTarget->createTargetMachine( TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, codegen::getExplicitCodeModel(), OLvl)); @@ -510,6 +514,7 @@ static int compileModule(char **argv, LLVMContext &Context) { return 1; } + InitializeOptions(TheTriple); Target = std::unique_ptr(TheTarget->createTargetMachine( TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, codegen::getExplicitCodeModel(), OLvl)); From 66cf68ed46789217a8382bb419a0bda1c4e97650 Mon Sep 17 00:00:00 2001 From: Evgenii Stepanov Date: Tue, 15 Sep 2020 12:49:18 -0700 Subject: [PATCH 412/544] [docs] Update ControlFlowIntegrity.rst. Expand the list of targets that support cfi-icall. Add ThinLTO everywhere LTO is mentioned. AFAIK all CFI features are supported with ThinLTO. Differential Revision: https://reviews.llvm.org/D87717 --- clang/docs/ControlFlowIntegrity.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/docs/ControlFlowIntegrity.rst b/clang/docs/ControlFlowIntegrity.rst index 03e27d3ede890..3f6b3ca6cafbc 100644 --- a/clang/docs/ControlFlowIntegrity.rst +++ b/clang/docs/ControlFlowIntegrity.rst @@ -76,8 +76,8 @@ For example, you can build your program with to use all schemes except for non-virtual member function call and indirect call checking. -Remember that you have to provide ``-flto`` if at least one CFI scheme is -enabled. +Remember that you have to provide ``-flto`` or ``-flto=thin`` if at +least one CFI scheme is enabled. Trapping and Diagnostics ======================== @@ -217,7 +217,8 @@ statically linked into the program or shared library, and calls across shared library boundaries are handled as if the callee was not compiled with ``-fsanitize=cfi-icall``. -This scheme is currently only supported on the x86 and x86_64 architectures. +This scheme is currently supported on a limited set of targets: x86, +x86_64, arm, arch64 and wasm. ``-fsanitize-cfi-icall-generalize-pointers`` -------------------------------------------- @@ -368,7 +369,7 @@ Shared library support Use **-f[no-]sanitize-cfi-cross-dso** to enable the cross-DSO control flow integrity mode, which allows all CFI schemes listed above to apply across DSO boundaries. As in the regular CFI, each DSO must be -built with ``-flto``. +built with ``-flto`` or ``-flto=thin``. Normally, CFI checks will only be performed for classes that have hidden LTO visibility. With this flag enabled, the compiler will emit cross-DSO CFI From 31e820378b8ae4d81e9d206a7dae64ccf4b4c97f Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 2 Oct 2020 15:02:52 -0400 Subject: [PATCH 413/544] [libc++] NFCI: Simplify macro definitions for the debug mode The debug mode always had three possibilities: - _LIBCPP_DEBUG is undefined => no assertions - _LIBCPP_DEBUG == 0 => some assertions - _LIBCPP_DEBUG == 1 => some assertions + iterator checks This was documented that way, however the code did not make this clear at all. The discrepancy between _LIBCPP_DEBUG and _LIBCPP_DEBUG_LEVEL was especially confusing. I reworked how the various macros are defined without changing anything else to make the code clearer. --- libcxx/include/__config | 28 ++++--- libcxx/include/__debug | 38 +++++---- libcxx/include/__hash_table | 86 ++++++++++---------- libcxx/include/iterator | 26 +++--- libcxx/include/list | 150 +++++++++++++++++------------------ libcxx/include/locale | 2 +- libcxx/include/string | 78 +++++++++--------- libcxx/include/unordered_map | 80 +++++++++---------- libcxx/include/unordered_set | 74 ++++++++--------- libcxx/include/vector | 76 +++++++++--------- 10 files changed, 319 insertions(+), 319 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 1b63573007237..b51261132ee8a 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -885,21 +885,23 @@ typedef unsigned int char32_t; # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) #endif // _LIBCPP_HAS_NO_STRONG_ENUMS -#ifdef _LIBCPP_DEBUG -# if _LIBCPP_DEBUG == 0 -# define _LIBCPP_DEBUG_LEVEL 1 -# elif _LIBCPP_DEBUG == 1 -# define _LIBCPP_DEBUG_LEVEL 2 -# else -# error Supported values for _LIBCPP_DEBUG are 0 and 1 -# endif -# if !defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_EXTERN_TEMPLATE(...) -# endif +// _LIBCPP_DEBUG potential values: +// - undefined: No assertions. This is the default. +// - 0: Basic assertions +// - 1: Basic assertions + iterator validity checks. +#if !defined(_LIBCPP_DEBUG) +# define _LIBCPP_DEBUG_LEVEL 0 +#elif _LIBCPP_DEBUG == 0 +# define _LIBCPP_DEBUG_LEVEL 1 +#elif _LIBCPP_DEBUG == 1 +# define _LIBCPP_DEBUG_LEVEL 2 +#else +# error Supported values for _LIBCPP_DEBUG are 0 and 1 #endif -#ifndef _LIBCPP_DEBUG_LEVEL -# define _LIBCPP_DEBUG_LEVEL 0 +// _LIBCPP_DEBUG_LEVEL is always defined to one of [0, 1, 2] at this point +#if _LIBCPP_DEBUG_LEVEL >= 1 +# define _LIBCPP_DISABLE_EXTERN_TEMPLATE #endif #ifdef _LIBCPP_DISABLE_EXTERN_TEMPLATE diff --git a/libcxx/include/__debug b/libcxx/include/__debug index 11367413fccc7..dbf47f6f53073 100644 --- a/libcxx/include/__debug +++ b/libcxx/include/__debug @@ -27,26 +27,24 @@ # include #endif -#if _LIBCPP_DEBUG_LEVEL >= 1 && !defined(_LIBCPP_ASSERT) -# define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : \ - _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) -#endif - -#if _LIBCPP_DEBUG_LEVEL >= 2 -#ifndef _LIBCPP_DEBUG_ASSERT -#define _LIBCPP_DEBUG_ASSERT(x, m) _LIBCPP_ASSERT(x, m) -#endif -#define _LIBCPP_DEBUG_MODE(...) __VA_ARGS__ -#endif - -#ifndef _LIBCPP_ASSERT -# define _LIBCPP_ASSERT(x, m) ((void)0) -#endif -#ifndef _LIBCPP_DEBUG_ASSERT +#if _LIBCPP_DEBUG_LEVEL == 0 +# define _LIBCPP_DEBUG_ASSERT(x, m) ((void)0) +# define _LIBCPP_DEBUG_MODE(...) ((void)0) +# define _LIBCPP_ASSERT_IMPL(x, m) ((void)0) +#elif _LIBCPP_DEBUG_LEVEL == 1 # define _LIBCPP_DEBUG_ASSERT(x, m) ((void)0) +# define _LIBCPP_DEBUG_MODE(...) ((void)0) +# define _LIBCPP_ASSERT_IMPL(x, m) ((x) ? (void)0 : _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) +#elif _LIBCPP_DEBUG_LEVEL == 2 +# define _LIBCPP_DEBUG_ASSERT(x, m) _LIBCPP_ASSERT(x, m) +# define _LIBCPP_DEBUG_MODE(...) __VA_ARGS__ +# define _LIBCPP_ASSERT_IMPL(x, m) ((x) ? (void)0 : _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) +#else +# error _LIBCPP_DEBUG_LEVEL must be one of 0, 1, 2 #endif -#ifndef _LIBCPP_DEBUG_MODE -#define _LIBCPP_DEBUG_MODE(...) ((void)0) + +#if !defined(_LIBCPP_ASSERT) +# define _LIBCPP_ASSERT(x, m) _LIBCPP_ASSERT_IMPL(x, m) #endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -83,7 +81,7 @@ void __libcpp_abort_debug_function(__libcpp_debug_info const&); _LIBCPP_FUNC_VIS bool __libcpp_set_debug_function(__libcpp_debug_function_type __func); -#if _LIBCPP_DEBUG_LEVEL >= 2 || defined(_LIBCPP_BUILDING_LIBRARY) +#if _LIBCPP_DEBUG_LEVEL == 2 || defined(_LIBCPP_BUILDING_LIBRARY) struct _LIBCPP_TYPE_VIS __c_node; @@ -271,7 +269,7 @@ _LIBCPP_FUNC_VIS __libcpp_db* __get_db(); _LIBCPP_FUNC_VIS const __libcpp_db* __get_const_db(); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 || defined(_LIBCPP_BUILDING_LIBRARY) +#endif // _LIBCPP_DEBUG_LEVEL == 2 || defined(_LIBCPP_BUILDING_LIBRARY) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 2d051ee49c385..8836391b2ab7c 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -298,7 +298,7 @@ public: _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_iterator(const __hash_iterator& __i) : __node_(__i.__node_) @@ -322,7 +322,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -364,7 +364,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_iterator(__next_pointer __node, const void* __c) _NOEXCEPT : __node_(__node) @@ -415,7 +415,7 @@ public: _LIBCPP_DEBUG_MODE(__get_db()->__iterator_copy(this, &__x)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_iterator(const __hash_const_iterator& __i) : __node_(__i.__node_) @@ -439,7 +439,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -480,7 +480,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_iterator(__next_pointer __node, const void* __c) _NOEXCEPT : __node_(__node) @@ -521,7 +521,7 @@ public: _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_local_iterator(const __hash_local_iterator& __i) : __node_(__i.__node_), @@ -549,7 +549,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -593,7 +593,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_local_iterator(__next_pointer __node, size_t __bucket, size_t __bucket_count, const void* __c) _NOEXCEPT @@ -662,7 +662,7 @@ public: _LIBCPP_DEBUG_MODE(__get_db()->__iterator_copy(this, &__x)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator(const __hash_const_local_iterator& __i) : __node_(__i.__node_), @@ -690,7 +690,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -734,7 +734,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator(__next_pointer __node, size_t __bucket, size_t __bucket_count, const void* __c) _NOEXCEPT @@ -1295,7 +1295,7 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::begin(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return local_iterator(__bucket_list_[__n], __n, bucket_count(), this); #else return local_iterator(__bucket_list_[__n], __n, bucket_count()); @@ -1308,7 +1308,7 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::end(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return local_iterator(nullptr, __n, bucket_count(), this); #else return local_iterator(nullptr, __n, bucket_count()); @@ -1321,7 +1321,7 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::cbegin(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_local_iterator(__bucket_list_[__n], __n, bucket_count(), this); #else return const_local_iterator(__bucket_list_[__n], __n, bucket_count()); @@ -1334,21 +1334,21 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::cend(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_local_iterator(nullptr, __n, bucket_count(), this); #else return const_local_iterator(nullptr, __n, bucket_count()); #endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const; bool __decrementable(const const_iterator* __i) const; bool __addable(const const_iterator* __i, ptrdiff_t __n) const; bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const; -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 private: void __rehash(size_type __n); @@ -1539,7 +1539,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() #endif __deallocate_node(__p1_.first().__next_); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__erase_c(this); #endif } @@ -1583,7 +1583,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) while (__np != nullptr) { __next_pointer __next = __np->__next_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __p = __c->end_; __p != __c->beg_; ) { @@ -1646,7 +1646,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( __u.__p1_.first().__next_ = nullptr; __u.size() = 0; } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__u); #endif } @@ -1800,7 +1800,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::begin() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__p1_.first().__next_, this); #else return iterator(__p1_.first().__next_); @@ -1812,7 +1812,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(nullptr, this); #else return iterator(nullptr); @@ -1824,7 +1824,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::begin() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(__p1_.first().__next_, this); #else return const_iterator(__p1_.first().__next_); @@ -1836,7 +1836,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(nullptr, this); #else return const_iterator(nullptr); @@ -1945,7 +1945,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __ __existing_node = __nd->__ptr(); __inserted = true; } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return pair(iterator(__existing_node, this), __inserted); #else return pair(iterator(__existing_node), __inserted); @@ -2043,7 +2043,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(__node_pointer __c __next_pointer __pn = __node_insert_multi_prepare(__cp->__hash(), __cp->__value_); __node_insert_multi_perform(__cp, __pn); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__cp->__ptr(), this); #else return iterator(__cp->__ptr()); @@ -2055,7 +2055,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); @@ -2078,7 +2078,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( __cp->__next_ = __np; __pp->__next_ = static_cast<__next_pointer>(__cp); ++size(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(static_cast<__next_pointer>(__cp), this); #else return iterator(static_cast<__next_pointer>(__cp)); @@ -2159,7 +2159,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __inserted = true; } __done: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return pair(iterator(__nd, this), __inserted); #else return pair(iterator(__nd), __inserted); @@ -2197,7 +2197,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( const_iterator __p, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); @@ -2225,7 +2225,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_multi(const_iterator __p, const __container_value_type& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::insert(const_iterator, lvalue) called with an iterator not" " referring to this unordered container"); @@ -2399,9 +2399,9 @@ template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc(); __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr); @@ -2470,7 +2470,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) { if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__value_, __k)) -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__nd, this); #else return iterator(__nd); @@ -2501,7 +2501,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const { if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__value_, __k)) -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(__nd, this); #else return const_iterator(__nd); @@ -2586,7 +2586,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { __next_pointer __np = __p.__node_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container erase(iterator) called with an iterator not" " referring to this container"); @@ -2606,7 +2606,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, "unodered container::erase(iterator, iterator) called with an iterator not" " referring to this unodered container"); @@ -2620,7 +2620,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, erase(__p); } __next_pointer __np = __last.__node_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator (__np, this); #else return iterator (__np); @@ -2691,7 +2691,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::remove(const_iterator __p) _NOEXCEPT __pn->__next_ = __cn->__next_; __cn->__next_ = nullptr; --size(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __dp = __c->end_; __dp != __c->beg_; ) { @@ -2842,7 +2842,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) if (__u.size() > 0) __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] = __u.__p1_.first().__ptr(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__u); #endif } @@ -2876,7 +2876,7 @@ swap(__hash_table<_Tp, _Hash, _Equal, _Alloc>& __x, __x.swap(__y); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 template bool @@ -2906,7 +2906,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__subscriptable(const const_iterator*, return false; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/iterator b/libcxx/include/iterator index e2910e9fdc2a1..e8e379624ac09 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -1438,7 +1438,7 @@ public: : __i{} #endif { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_i(this); #endif } @@ -1447,11 +1447,11 @@ public: typename enable_if::value>::type* = 0) _NOEXCEPT : __i(__u.base()) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__iterator_copy(this, &__u); #endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const __wrap_iter& __x) : __i(__x.base()) @@ -1476,7 +1476,7 @@ public: #endif _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator*() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable iterator"); #endif @@ -1484,7 +1484,7 @@ public: } _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG pointer operator->() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable iterator"); #endif @@ -1492,7 +1492,7 @@ public: } _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator++() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to increment non-incrementable iterator"); #endif @@ -1504,7 +1504,7 @@ public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator--() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), "Attempted to decrement non-decrementable iterator"); #endif @@ -1517,7 +1517,7 @@ public: {__wrap_iter __w(*this); __w += __n; return __w;} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator+=(difference_type __n) _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n), "Attempted to add/subtract iterator outside of valid range"); #endif @@ -1530,7 +1530,7 @@ public: {*this += -__n; return *this;} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator[](difference_type __n) const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n), "Attempted to subscript iterator outside of valid range"); #endif @@ -1540,7 +1540,7 @@ public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG iterator_type base() const _NOEXCEPT {return __i;} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const void* __p, iterator_type __x) : __i(__x) { __get_db()->__insert_ic(this, __p); @@ -1641,7 +1641,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), "Attempted to compare incomparable iterators"); #endif @@ -1719,7 +1719,7 @@ auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -> decltype(__x.base() - __y.base()) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), "Attempted to subtract incompatible iterators"); #endif @@ -1731,7 +1731,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename __wrap_iter<_Iter1>::difference_type operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), "Attempted to subtract incompatible iterators"); #endif diff --git a/libcxx/include/list b/libcxx/include/list index 55b45f1a67d4f..1c085b4e6dfa2 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -293,7 +293,7 @@ class _LIBCPP_TEMPLATE_VIS __list_iterator __link_pointer __ptr_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY explicit __list_iterator(__link_pointer __p, const void* __c) _NOEXCEPT : __ptr_(__p) @@ -320,12 +320,12 @@ public: _LIBCPP_INLINE_VISIBILITY __list_iterator() _NOEXCEPT : __ptr_(nullptr) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_i(this); #endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __list_iterator(const __list_iterator& __p) @@ -351,12 +351,12 @@ public: return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable list::iterator"); #endif @@ -365,7 +365,7 @@ public: _LIBCPP_INLINE_VISIBILITY pointer operator->() const { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable list::iterator"); #endif @@ -375,7 +375,7 @@ public: _LIBCPP_INLINE_VISIBILITY __list_iterator& operator++() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to increment non-incrementable list::iterator"); #endif @@ -388,7 +388,7 @@ public: _LIBCPP_INLINE_VISIBILITY __list_iterator& operator--() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), "Attempted to decrement non-decrementable list::iterator"); #endif @@ -416,7 +416,7 @@ class _LIBCPP_TEMPLATE_VIS __list_const_iterator __link_pointer __ptr_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY explicit __list_const_iterator(__link_pointer __p, const void* __c) _NOEXCEPT : __ptr_(__p) @@ -440,7 +440,7 @@ public: _LIBCPP_INLINE_VISIBILITY __list_const_iterator() _NOEXCEPT : __ptr_(nullptr) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_i(this); #endif } @@ -448,12 +448,12 @@ public: __list_const_iterator(const __list_iterator<_Tp, _VoidPtr>& __p) _NOEXCEPT : __ptr_(__p.__ptr_) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__iterator_copy(this, &__p); #endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __list_const_iterator(const __list_const_iterator& __p) @@ -479,11 +479,11 @@ public: return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable list::const_iterator"); #endif @@ -492,7 +492,7 @@ public: _LIBCPP_INLINE_VISIBILITY pointer operator->() const { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable list::const_iterator"); #endif @@ -502,7 +502,7 @@ public: _LIBCPP_INLINE_VISIBILITY __list_const_iterator& operator++() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to increment non-incrementable list::const_iterator"); #endif @@ -515,7 +515,7 @@ public: _LIBCPP_INLINE_VISIBILITY __list_const_iterator& operator--() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), "Attempted to decrement non-decrementable list::const_iterator"); #endif @@ -614,7 +614,7 @@ protected: _LIBCPP_INLINE_VISIBILITY iterator begin() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__end_.__next_, this); #else return iterator(__end_.__next_); @@ -623,7 +623,7 @@ protected: _LIBCPP_INLINE_VISIBILITY const_iterator begin() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(__end_.__next_, this); #else return const_iterator(__end_.__next_); @@ -632,7 +632,7 @@ protected: _LIBCPP_INLINE_VISIBILITY iterator end() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__end_as_link(), this); #else return iterator(__end_as_link()); @@ -641,7 +641,7 @@ protected: _LIBCPP_INLINE_VISIBILITY const_iterator end() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(__end_as_link(), this); #else return const_iterator(__end_as_link()); @@ -696,7 +696,7 @@ private: _LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); #endif } @@ -741,7 +741,7 @@ inline __list_imp<_Tp, _Alloc>::__list_imp(__node_allocator&& __a) _NOEXCEPT template __list_imp<_Tp, _Alloc>::~__list_imp() { clear(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__erase_c(this); #endif } @@ -795,7 +795,7 @@ __list_imp<_Tp, _Alloc>::swap(__list_imp& __c) else __c.__end_.__prev_->__next_ = __c.__end_.__next_->__prev_ = __c.__end_as_link(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __libcpp_db* __db = __get_db(); __c_node* __cn1 = __db->__find_c_and_lock(this); __c_node* __cn2 = __db->__find_c(&__c); @@ -870,14 +870,14 @@ public: list() _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } _LIBCPP_INLINE_VISIBILITY explicit list(const allocator_type& __a) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1117,14 +1117,14 @@ public: return __hold_pointer(__p, __node_destructor(__na, 1)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const; bool __decrementable(const const_iterator* __i) const; bool __addable(const const_iterator* __i, ptrdiff_t __n) const; bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const; -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 private: _LIBCPP_INLINE_VISIBILITY @@ -1207,7 +1207,7 @@ list<_Tp, _Alloc>::__iterator(size_type __n) template list<_Tp, _Alloc>::list(size_type __n) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __n > 0; --__n) @@ -1222,7 +1222,7 @@ list<_Tp, _Alloc>::list(size_type __n) template list<_Tp, _Alloc>::list(size_type __n, const allocator_type& __a) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __n > 0; --__n) @@ -1233,7 +1233,7 @@ list<_Tp, _Alloc>::list(size_type __n, const allocator_type& __a) : base(__a) template list<_Tp, _Alloc>::list(size_type __n, const value_type& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __n > 0; --__n) @@ -1244,7 +1244,7 @@ template list<_Tp, _Alloc>::list(size_type __n, const value_type& __x, const allocator_type& __a) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __n > 0; --__n) @@ -1256,7 +1256,7 @@ template list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __f != __l; ++__f) @@ -1269,7 +1269,7 @@ list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, const allocator_type& __a, typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __f != __l; ++__f) @@ -1280,7 +1280,7 @@ template list<_Tp, _Alloc>::list(const list& __c) : base(__node_alloc_traits::select_on_container_copy_construction( __c.__node_alloc())) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (const_iterator __i = __c.begin(), __e = __c.end(); __i != __e; ++__i) @@ -1291,7 +1291,7 @@ template list<_Tp, _Alloc>::list(const list& __c, const allocator_type& __a) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (const_iterator __i = __c.begin(), __e = __c.end(); __i != __e; ++__i) @@ -1304,7 +1304,7 @@ template list<_Tp, _Alloc>::list(initializer_list __il, const allocator_type& __a) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (typename initializer_list::const_iterator __i = __il.begin(), @@ -1315,7 +1315,7 @@ list<_Tp, _Alloc>::list(initializer_list __il, const allocator_type& template list<_Tp, _Alloc>::list(initializer_list __il) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (typename initializer_list::const_iterator __i = __il.begin(), @@ -1327,7 +1327,7 @@ template inline list<_Tp, _Alloc>::list(list&& __c) _NOEXCEPT_(is_nothrow_move_constructible<__node_allocator>::value) : base(_VSTD::move(__c.__node_alloc())) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif splice(end(), __c); @@ -1338,7 +1338,7 @@ inline list<_Tp, _Alloc>::list(list&& __c, const allocator_type& __a) : base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__a == __c.get_allocator()) @@ -1415,7 +1415,7 @@ list<_Tp, _Alloc>::assign(_InpIter __f, _InpIter __l, insert(__e, __f, __l); else erase(__i, __e); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); #endif } @@ -1432,7 +1432,7 @@ list<_Tp, _Alloc>::assign(size_type __n, const value_type& __x) insert(__e, __n, __x); else erase(__i, __e); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); #endif } @@ -1449,7 +1449,7 @@ template typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, const value_type& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::insert(iterator, x) called with an iterator not" " referring to this list"); @@ -1459,7 +1459,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, const value_type& __x) __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), __x); __link_nodes(__p.__ptr_, __hold->__as_link(), __hold->__as_link()); ++base::__sz(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__hold.release()->__as_link(), this); #else return iterator(__hold.release()->__as_link()); @@ -1470,7 +1470,7 @@ template typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::insert(iterator, n, x) called with an iterator not" " referring to this list"); @@ -1485,7 +1485,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& _ __hold_pointer __hold = __allocate_node(__na); __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), __x); ++__ds; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __r = iterator(__hold->__as_link(), this); #else __r = iterator(__hold->__as_link()); @@ -1515,7 +1515,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& _ __node_alloc_traits::deallocate(__na, __e.__ptr_->__as_node(), 1); if (__prev == 0) break; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __e = iterator(__prev, this); #else __e = iterator(__prev); @@ -1536,7 +1536,7 @@ typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l, typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::insert(iterator, range) called with an iterator not" " referring to this list"); @@ -1551,7 +1551,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l, __hold_pointer __hold = __allocate_node(__na); __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), *__f); ++__ds; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __r = iterator(__hold.get()->__as_link(), this); #else __r = iterator(__hold.get()->__as_link()); @@ -1581,7 +1581,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l, __node_alloc_traits::deallocate(__na, __e.__ptr_->__as_node(), 1); if (__prev == 0) break; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __e = iterator(__prev, this); #else __e = iterator(__prev); @@ -1695,7 +1695,7 @@ template typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::emplace(const_iterator __p, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::emplace(iterator, args...) called with an iterator not" " referring to this list"); @@ -1707,7 +1707,7 @@ list<_Tp, _Alloc>::emplace(const_iterator __p, _Args&&... __args) __link_nodes(__p.__ptr_, __nl, __nl); ++base::__sz(); __hold.release(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__nl, this); #else return iterator(__nl); @@ -1718,7 +1718,7 @@ template typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, value_type&& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::insert(iterator, x) called with an iterator not" " referring to this list"); @@ -1730,7 +1730,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, value_type&& __x) __link_nodes(__p.__ptr_, __nl, __nl); ++base::__sz(); __hold.release(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__nl, this); #else return iterator(__nl); @@ -1748,7 +1748,7 @@ list<_Tp, _Alloc>::pop_front() __link_pointer __n = base::__end_.__next_; base::__unlink_nodes(__n, __n); --base::__sz(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __p = __c->end_; __p != __c->beg_; ) { @@ -1777,7 +1777,7 @@ list<_Tp, _Alloc>::pop_back() __link_pointer __n = base::__end_.__prev_; base::__unlink_nodes(__n, __n); --base::__sz(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __p = __c->end_; __p != __c->beg_; ) { @@ -1801,7 +1801,7 @@ template typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __p) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::erase(iterator) called with an iterator not" " referring to this list"); @@ -1813,7 +1813,7 @@ list<_Tp, _Alloc>::erase(const_iterator __p) __link_pointer __r = __n->__next_; base::__unlink_nodes(__n, __n); --base::__sz(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __ip = __c->end_; __ip != __c->beg_; ) { @@ -1831,7 +1831,7 @@ list<_Tp, _Alloc>::erase(const_iterator __p) __node_pointer __np = __n->__as_node(); __node_alloc_traits::destroy(__na, _VSTD::addressof(__np->__value_)); __node_alloc_traits::deallocate(__na, __np, 1); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__r, this); #else return iterator(__r); @@ -1842,7 +1842,7 @@ template typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __f, const_iterator __l) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__f) == this, "list::erase(iterator, iterator) called with an iterator not" " referring to this list"); @@ -1859,7 +1859,7 @@ list<_Tp, _Alloc>::erase(const_iterator __f, const_iterator __l) __link_pointer __n = __f.__ptr_; ++__f; --base::__sz(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __p = __c->end_; __p != __c->beg_; ) { @@ -1879,7 +1879,7 @@ list<_Tp, _Alloc>::erase(const_iterator __f, const_iterator __l) __node_alloc_traits::deallocate(__na, __np, 1); } } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__l.__ptr_, this); #else return iterator(__l.__ptr_); @@ -1900,7 +1900,7 @@ list<_Tp, _Alloc>::resize(size_type __n) __hold_pointer __hold = __allocate_node(__na); __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_)); ++__ds; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 iterator __r = iterator(__hold.release()->__as_link(), this); #else iterator __r = iterator(__hold.release()->__as_link()); @@ -1929,7 +1929,7 @@ list<_Tp, _Alloc>::resize(size_type __n) __node_alloc_traits::deallocate(__na, __e.__ptr_->__as_node(), 1); if (__prev == 0) break; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __e = iterator(__prev, this); #else __e = iterator(__prev); @@ -1958,7 +1958,7 @@ list<_Tp, _Alloc>::resize(size_type __n, const value_type& __x) __node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), __x); ++__ds; __link_pointer __nl = __hold.release()->__as_link(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 iterator __r = iterator(__nl, this); #else iterator __r = iterator(__nl); @@ -1987,7 +1987,7 @@ list<_Tp, _Alloc>::resize(size_type __n, const value_type& __x) __node_alloc_traits::deallocate(__na, __e.__ptr_->__as_node(), 1); if (__prev == 0) break; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __e = iterator(__prev, this); #else __e = iterator(__prev); @@ -2007,7 +2007,7 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c) { _LIBCPP_ASSERT(this != &__c, "list::splice(iterator, list) called with this == &list"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::splice(iterator, list) called with an iterator not" " referring to this list"); @@ -2020,7 +2020,7 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c) __link_nodes(__p.__ptr_, __f, __l); base::__sz() += __c.__sz(); __c.__sz() = 0; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 if (&__c != this) { __libcpp_db* __db = __get_db(); __c_node* __cn1 = __db->__find_c_and_lock(this); @@ -2047,7 +2047,7 @@ template void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::splice(iterator, list, iterator) called with first iterator not" " referring to this list"); @@ -2065,7 +2065,7 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i) __link_nodes(__p.__ptr_, __f, __f); --__c.__sz(); ++base::__sz(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 if (&__c != this) { __libcpp_db* __db = __get_db(); __c_node* __cn1 = __db->__find_c_and_lock(this); @@ -2092,7 +2092,7 @@ template void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, const_iterator __l) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "list::splice(iterator, list, iterator, iterator) called with first iterator not" " referring to this list"); @@ -2121,7 +2121,7 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, con } base::__unlink_nodes(__first, __last); __link_nodes(__p.__ptr_, __first, __last); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 if (&__c != this) { __libcpp_db* __db = __get_db(); __c_node* __cn1 = __db->__find_c_and_lock(this); @@ -2258,7 +2258,7 @@ list<_Tp, _Alloc>::merge(list& __c, _Comp __comp) ++__f1; } splice(__e1, __c); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __libcpp_db* __db = __get_db(); __c_node* __cn1 = __db->__find_c_and_lock(this); __c_node* __cn2 = __db->__find_c(&__c); @@ -2382,7 +2382,7 @@ list<_Tp, _Alloc>::__invariants() const return size() == _VSTD::distance(begin(), end()); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 template bool @@ -2412,7 +2412,7 @@ list<_Tp, _Alloc>::__subscriptable(const const_iterator*, ptrdiff_t) const return false; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 template inline _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/include/locale b/libcxx/include/locale index 33f53d7916345..60aab50d17644 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -1427,7 +1427,7 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, return do_put(__s, __iob, __fl, (unsigned long)__v); const numpunct& __np = use_facet >(__iob.getloc()); typedef typename numpunct::string_type string_type; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 string_type __tmp(__v ? __np.truename() : __np.falsename()); string_type __nm = _VSTD::move(__tmp); #else diff --git a/libcxx/include/string b/libcxx/include/string index 2f846eda06c58..b6380da95c64b 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -816,7 +816,7 @@ public: basic_string(const _CharT* __s) : __r_(__default_init_tag(), __default_init_tag()) { _LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*) detected nullptr"); __init(__s, traits_type::length(__s)); -# if _LIBCPP_DEBUG_LEVEL >= 2 +# if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); # endif } @@ -890,7 +890,7 @@ public: _LIBCPP_INLINE_VISIBILITY basic_string& operator=(const value_type* __s) {return assign(__s);} basic_string& operator=(value_type __c); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY iterator begin() _NOEXCEPT {return iterator(this, __get_pointer());} @@ -916,7 +916,7 @@ public: _LIBCPP_INLINE_VISIBILITY const_iterator end() const _NOEXCEPT {return const_iterator(__get_pointer() + size());} -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reverse_iterator rbegin() _NOEXCEPT {return reverse_iterator(end());} @@ -1422,14 +1422,14 @@ public: bool __is_long() const _NOEXCEPT {return bool(__r_.first().__s.__size_ & __short_mask);} -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const; bool __decrementable(const const_iterator* __i) const; bool __addable(const const_iterator* __i, ptrdiff_t __n) const; bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const; -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 private: _LIBCPP_INLINE_VISIBILITY @@ -1726,21 +1726,21 @@ inline void basic_string<_CharT, _Traits, _Allocator>::__invalidate_all_iterators() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif } template inline void basic_string<_CharT, _Traits, _Allocator>::__invalidate_iterators_past(size_type -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __pos #endif ) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); if (__c) { @@ -1758,7 +1758,7 @@ basic_string<_CharT, _Traits, _Allocator>::__invalidate_iterators_past(size_type } __get_db()->unlock(); } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 } template @@ -1767,7 +1767,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string() _NOEXCEPT_(is_nothrow_default_constructible::value) : __r_(__default_init_tag(), __default_init_tag()) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __zero(); @@ -1783,7 +1783,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const allocator_type& __ #endif : __r_(__default_init_tag(), __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __zero(); @@ -1845,7 +1845,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, const { _LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*, allocator) detected nullptr"); __init(__s, traits_type::length(__s)); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1857,7 +1857,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, size_ { _LIBCPP_ASSERT(__n == 0 || __s != nullptr, "basic_string(const char*, n) detected nullptr"); __init(__s, __n); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1869,7 +1869,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, size_ { _LIBCPP_ASSERT(__n == 0 || __s != nullptr, "basic_string(const char*, n, allocator) detected nullptr"); __init(__s, __n); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1884,7 +1884,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st __init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1899,7 +1899,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string( else __init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1936,7 +1936,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str) : __r_(_VSTD::move(__str.__r_)) { __str.__zero(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); if (__is_long()) __get_db()->swap(this, &__str); @@ -1955,7 +1955,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str, co __r_.first().__r = __str.__r_.first().__r; __str.__zero(); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); if (__is_long()) __get_db()->swap(this, &__str); @@ -1994,7 +1994,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __ : __r_(__default_init_tag(), __default_init_tag()) { __init(__n, __c); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2005,7 +2005,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __ : __r_(__default_init_tag(), __a) { __init(__n, __c); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2020,7 +2020,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st if (__pos > __str_sz) this->__throw_out_of_range(); __init(__str.data() + __pos, _VSTD::min(__n, __str_sz - __pos)); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2035,7 +2035,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st if (__pos > __str_sz) this->__throw_out_of_range(); __init(__str.data() + __pos, __str_sz - __pos); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2049,7 +2049,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string( __self_view __sv0 = __t; __self_view __sv = __sv0.substr(__pos, __n); __init(__sv.data(), __sv.size()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2061,7 +2061,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t) { __self_view __sv = __t; __init(__sv.data(), __sv.size()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2073,7 +2073,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t, const _ { __self_view __sv = __t; __init(__sv.data(), __sv.size()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2141,7 +2141,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(_InputIterator __first, : __r_(__default_init_tag(), __default_init_tag()) { __init(__first, __last); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2154,7 +2154,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(_InputIterator __first, : __r_(__default_init_tag(), __a) { __init(__first, __last); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2168,7 +2168,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string( : __r_(__default_init_tag(), __default_init_tag()) { __init(__il.begin(), __il.end()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2181,7 +2181,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string( : __r_(__default_init_tag(), __a) { __init(__il.begin(), __il.end()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2191,7 +2191,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string( template basic_string<_CharT, _Traits, _Allocator>::~basic_string() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__erase_c(this); #endif if (__is_long()) @@ -2768,7 +2768,7 @@ _EnableIf > basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _InputIterator __first, _InputIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, "string::insert(iterator, range) called with an iterator not" " referring to this string"); @@ -2787,7 +2787,7 @@ _EnableIf > basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _ForwardIterator __first, _ForwardIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, "string::insert(iterator, range) called with an iterator not" " referring to this string"); @@ -2903,7 +2903,7 @@ inline typename basic_string<_CharT, _Traits, _Allocator>::iterator basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, size_type __n, value_type __c) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, "string::insert(iterator, n, value) called with an iterator not" " referring to this string"); @@ -3137,7 +3137,7 @@ inline typename basic_string<_CharT, _Traits, _Allocator>::iterator basic_string<_CharT, _Traits, _Allocator>::erase(const_iterator __pos) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, "string::erase(iterator) called with an iterator not" " referring to this string"); @@ -3155,7 +3155,7 @@ inline typename basic_string<_CharT, _Traits, _Allocator>::iterator basic_string<_CharT, _Traits, _Allocator>::erase(const_iterator __first, const_iterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, "string::erase(iterator, iterator) called with an iterator not" " referring to this string"); @@ -3426,7 +3426,7 @@ basic_string<_CharT, _Traits, _Allocator>::swap(basic_string& __str) __is_nothrow_swappable::value) #endif { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 if (!__is_long()) __get_db()->__invalidate_all(this); if (!__str.__is_long()) @@ -4425,7 +4425,7 @@ inline _LIBCPP_INLINE_VISIBILITY } #endif -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 template bool @@ -4459,7 +4459,7 @@ basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* return this->data() <= __p && __p < this->data() + this->size(); } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 #if _LIBCPP_STD_VER > 11 // Literal suffixes for basic_string [basic.string.literals] diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index f130cfffdc9cc..fcdb15b32b187 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -906,7 +906,7 @@ public: unordered_map() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1025,7 +1025,7 @@ public: {return __table_.__insert_unique(__x);} iterator insert(const_iterator __p, const value_type& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not" " referring to this unordered_map"); @@ -1049,7 +1049,7 @@ public: {return __table_.__insert_unique(_VSTD::move(__x));} iterator insert(const_iterator __p, value_type&& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not" " referring to this unordered_map"); @@ -1070,7 +1070,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __p, _Pp&& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::insert(const_iterator, value_type&&) called with an iterator not" " referring to this unordered_map"); @@ -1089,7 +1089,7 @@ public: template _LIBCPP_INLINE_VISIBILITY iterator emplace_hint(const_iterator __p, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered_map"); @@ -1124,7 +1124,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); @@ -1138,7 +1138,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); @@ -1336,7 +1336,7 @@ public: _LIBCPP_INLINE_VISIBILITY void reserve(size_type __n) {__table_.reserve(__n);} -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const {return __table_.__dereferenceable(&__i->__i_);} @@ -1347,7 +1347,7 @@ public: bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const {return __table_.__addable(&__i->__i_, __n);} -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 private: @@ -1428,7 +1428,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1440,7 +1440,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const allocator_type& __a) : __table_(__hf, __eql, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1452,7 +1452,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const allocator_type& __a) : __table_(typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1462,7 +1462,7 @@ template unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( _InputIterator __first, _InputIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__first, __last); @@ -1475,7 +1475,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1489,7 +1489,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1501,7 +1501,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const unordered_map& __u) : __table_(__u.__table_) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -1513,7 +1513,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const unordered_map& __u, const allocator_type& __a) : __table_(__u.__table_, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -1529,7 +1529,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( _NOEXCEPT_(is_nothrow_move_constructible<__table>::value) : __table_(_VSTD::move(__u.__table_)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); __get_db()->swap(this, &__u); #endif @@ -1540,7 +1540,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( unordered_map&& __u, const allocator_type& __a) : __table_(_VSTD::move(__u.__table_), typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__a != __u.get_allocator()) @@ -1551,7 +1551,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( __u.__table_.remove((__i++).__i_)->__value_.__move()); } } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 else __get_db()->swap(this, &__u); #endif @@ -1561,7 +1561,7 @@ template unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( initializer_list __il) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__il.begin(), __il.end()); @@ -1573,7 +1573,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1586,7 +1586,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1807,7 +1807,7 @@ public: unordered_multimap() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2108,7 +2108,7 @@ public: _LIBCPP_INLINE_VISIBILITY void reserve(size_type __n) {__table_.reserve(__n);} -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const {return __table_.__dereferenceable(&__i->__i_);} @@ -2119,7 +2119,7 @@ public: bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const {return __table_.__addable(&__i->__i_, __n);} -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 }; @@ -2196,7 +2196,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -2208,7 +2208,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const allocator_type& __a) : __table_(__hf, __eql, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -2219,7 +2219,7 @@ template unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( _InputIterator __first, _InputIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__first, __last); @@ -2232,7 +2232,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -2246,7 +2246,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -2259,7 +2259,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const allocator_type& __a) : __table_(typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -2269,7 +2269,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const unordered_multimap& __u) : __table_(__u.__table_) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -2281,7 +2281,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const unordered_multimap& __u, const allocator_type& __a) : __table_(__u.__table_, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -2297,7 +2297,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( _NOEXCEPT_(is_nothrow_move_constructible<__table>::value) : __table_(_VSTD::move(__u.__table_)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); __get_db()->swap(this, &__u); #endif @@ -2308,7 +2308,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( unordered_multimap&& __u, const allocator_type& __a) : __table_(_VSTD::move(__u.__table_), typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__a != __u.get_allocator()) @@ -2320,7 +2320,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( __u.__table_.remove((__i++).__i_)->__value_.__move()); } } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 else __get_db()->swap(this, &__u); #endif @@ -2330,7 +2330,7 @@ template unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( initializer_list __il) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__il.begin(), __il.end()); @@ -2342,7 +2342,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -2355,7 +2355,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, typename __table::allocator_type(__a)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 6c4ad938006f9..da4beb176d2a9 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -425,7 +425,7 @@ public: unordered_set() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -539,7 +539,7 @@ public: {return __table_.__emplace_unique(_VSTD::forward<_Args>(__args)...);} template _LIBCPP_INLINE_VISIBILITY -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 iterator emplace_hint(const_iterator __p, _Args&&... __args) { _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, @@ -556,7 +556,7 @@ public: pair insert(value_type&& __x) {return __table_.__insert_unique(_VSTD::move(__x));} _LIBCPP_INLINE_VISIBILITY -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 iterator insert(const_iterator __p, value_type&& __x) { _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, @@ -577,7 +577,7 @@ public: {return __table_.__insert_unique(__x);} _LIBCPP_INLINE_VISIBILITY -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 iterator insert(const_iterator __p, const value_type& __x) { _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, @@ -726,7 +726,7 @@ public: _LIBCPP_INLINE_VISIBILITY void reserve(size_type __n) {__table_.reserve(__n);} -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const {return __table_.__dereferenceable(__i);} @@ -737,7 +737,7 @@ public: bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const {return __table_.__addable(__i, __n);} -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 }; @@ -802,7 +802,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -813,7 +813,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(size_type __n, const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -824,7 +824,7 @@ template unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( _InputIterator __first, _InputIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__first, __last); @@ -837,7 +837,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -851,7 +851,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -864,7 +864,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const allocator_type& __a) : __table_(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -874,7 +874,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const unordered_set& __u) : __table_(__u.__table_) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -886,7 +886,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const unordered_set& __u, const allocator_type& __a) : __table_(__u.__table_, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -902,7 +902,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( _NOEXCEPT_(is_nothrow_move_constructible<__table>::value) : __table_(_VSTD::move(__u.__table_)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); __get_db()->swap(this, &__u); #endif @@ -913,7 +913,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( unordered_set&& __u, const allocator_type& __a) : __table_(_VSTD::move(__u.__table_), __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__a != __u.get_allocator()) @@ -922,7 +922,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( while (__u.size() != 0) __table_.__insert_unique(_VSTD::move(__u.__table_.remove(__i++)->__value_)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 else __get_db()->swap(this, &__u); #endif @@ -932,7 +932,7 @@ template unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( initializer_list __il) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__il.begin(), __il.end()); @@ -944,7 +944,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -957,7 +957,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1091,7 +1091,7 @@ public: unordered_multiset() _NOEXCEPT_(is_nothrow_default_constructible<__table>::value) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1361,7 +1361,7 @@ public: _LIBCPP_INLINE_VISIBILITY void reserve(size_type __n) {__table_.reserve(__n);} -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const {return __table_.__dereferenceable(__i);} @@ -1372,7 +1372,7 @@ public: bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const {return __table_.__addable(__i, __n);} -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 }; @@ -1435,7 +1435,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( size_type __n, const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1447,7 +1447,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const allocator_type& __a) : __table_(__hf, __eql, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1458,7 +1458,7 @@ template unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( _InputIterator __first, _InputIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__first, __last); @@ -1471,7 +1471,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const hasher& __hf, const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1485,7 +1485,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1498,7 +1498,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const allocator_type& __a) : __table_(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -1508,7 +1508,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const unordered_multiset& __u) : __table_(__u.__table_) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -1520,7 +1520,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const unordered_multiset& __u, const allocator_type& __a) : __table_(__u.__table_, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__u.bucket_count()); @@ -1536,7 +1536,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( _NOEXCEPT_(is_nothrow_move_constructible<__table>::value) : __table_(_VSTD::move(__u.__table_)) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); __get_db()->swap(this, &__u); #endif @@ -1547,7 +1547,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( unordered_multiset&& __u, const allocator_type& __a) : __table_(_VSTD::move(__u.__table_), __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__a != __u.get_allocator()) @@ -1556,7 +1556,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( while (__u.size() != 0) __table_.__insert_multi(_VSTD::move(__u.__table_.remove(__i++)->__value_)); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 else __get_db()->swap(this, &__u); #endif @@ -1566,7 +1566,7 @@ template unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( initializer_list __il) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif insert(__il.begin(), __il.end()); @@ -1578,7 +1578,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const key_equal& __eql) : __table_(__hf, __eql) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); @@ -1591,7 +1591,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( const key_equal& __eql, const allocator_type& __a) : __table_(__hf, __eql, __a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif __table_.rehash(__n); diff --git a/libcxx/include/vector b/libcxx/include/vector index 1007beeaafd03..977207f7a302b 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -496,7 +496,7 @@ public: _LIBCPP_INLINE_VISIBILITY vector() _NOEXCEPT_(is_nothrow_default_constructible::value) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -508,7 +508,7 @@ public: #endif : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif } @@ -551,7 +551,7 @@ public: ~vector() { __annotate_delete(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__erase_c(this); #endif } @@ -789,14 +789,14 @@ public: bool __invariants() const; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const; bool __decrementable(const const_iterator* __i) const; bool __addable(const const_iterator* __i, ptrdiff_t __n) const; bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const; -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 private: _LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators(); @@ -1121,7 +1121,7 @@ vector<_Tp, _Allocator>::__append(size_type __n, const_reference __x) template vector<_Tp, _Allocator>::vector(size_type __n) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__n > 0) @@ -1136,7 +1136,7 @@ template vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__n > 0) @@ -1150,7 +1150,7 @@ vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a) template vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__n > 0) @@ -1164,7 +1164,7 @@ template vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__n > 0) @@ -1184,7 +1184,7 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first, typename iterator_traits<_InputIterator>::reference>::value, _InputIterator>::type __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __first != __last; ++__first) @@ -1201,7 +1201,7 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first, _InputIterator __last, c typename iterator_traits<_InputIterator>::reference>::value>::type*) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif for (; __first != __last; ++__first) @@ -1217,7 +1217,7 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, typename iterator_traits<_ForwardIterator>::reference>::value, _ForwardIterator>::type __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif size_type __n = static_cast(_VSTD::distance(__first, __last)); @@ -1237,7 +1237,7 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __las typename iterator_traits<_ForwardIterator>::reference>::value>::type*) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif size_type __n = static_cast(_VSTD::distance(__first, __last)); @@ -1252,7 +1252,7 @@ template vector<_Tp, _Allocator>::vector(const vector& __x) : __base(__alloc_traits::select_on_container_copy_construction(__x.__alloc())) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif size_type __n = __x.size(); @@ -1267,7 +1267,7 @@ template vector<_Tp, _Allocator>::vector(const vector& __x, const allocator_type& __a) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif size_type __n = __x.size(); @@ -1290,7 +1290,7 @@ vector<_Tp, _Allocator>::vector(vector&& __x) #endif : __base(_VSTD::move(__x.__alloc())) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); __get_db()->swap(this, &__x); #endif @@ -1305,7 +1305,7 @@ inline _LIBCPP_INLINE_VISIBILITY vector<_Tp, _Allocator>::vector(vector&& __x, const allocator_type& __a) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__a == __x.__alloc()) @@ -1314,7 +1314,7 @@ vector<_Tp, _Allocator>::vector(vector&& __x, const allocator_type& __a) this->__end_ = __x.__end_; this->__end_cap() = __x.__end_cap(); __x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__x); #endif } @@ -1329,7 +1329,7 @@ template inline _LIBCPP_INLINE_VISIBILITY vector<_Tp, _Allocator>::vector(initializer_list __il) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__il.size() > 0) @@ -1344,7 +1344,7 @@ inline _LIBCPP_INLINE_VISIBILITY vector<_Tp, _Allocator>::vector(initializer_list __il, const allocator_type& __a) : __base(__a) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__insert_c(this); #endif if (__il.size() > 0) @@ -1390,7 +1390,7 @@ vector<_Tp, _Allocator>::__move_assign(vector& __c, true_type) this->__end_ = __c.__end_; this->__end_cap() = __c.__end_cap(); __c.__begin_ = __c.__end_ = __c.__end_cap() = nullptr; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__c); #endif } @@ -1493,7 +1493,7 @@ inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::__make_iter(pointer __p) _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(this, __p); #else return iterator(__p); @@ -1505,7 +1505,7 @@ inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::const_iterator vector<_Tp, _Allocator>::__make_iter(const_pointer __p) const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(this, __p); #else return const_iterator(__p); @@ -1709,7 +1709,7 @@ inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::erase(const_iterator __position) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::erase(iterator) called with an iterator not" " referring to this vector"); @@ -1728,7 +1728,7 @@ template typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::erase(const_iterator __first, const_iterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, "vector::erase(iterator, iterator) called with an iterator not" " referring to this vector"); @@ -1769,7 +1769,7 @@ template typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::insert(iterator, x) called with an iterator not" " referring to this vector"); @@ -1806,7 +1806,7 @@ template typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::insert(iterator, x) called with an iterator not" " referring to this vector"); @@ -1839,7 +1839,7 @@ template typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::emplace(iterator, x) called with an iterator not" " referring to this vector"); @@ -1874,7 +1874,7 @@ template typename vector<_Tp, _Allocator>::iterator vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_reference __x) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::insert(iterator, n, x) called with an iterator not" " referring to this vector"); @@ -1925,7 +1925,7 @@ typename enable_if >::type vector<_Tp, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::insert(iterator, range) called with an iterator not" " referring to this vector"); @@ -1978,7 +1978,7 @@ typename enable_if >::type vector<_Tp, _Allocator>::insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this, "vector::insert(iterator, range) called with an iterator not" " referring to this vector"); @@ -2059,9 +2059,9 @@ vector<_Tp, _Allocator>::swap(vector& __x) _VSTD::swap(this->__end_cap(), __x.__end_cap()); __swap_allocator(this->__alloc(), __x.__alloc(), integral_constant()); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__x); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif } template @@ -2085,7 +2085,7 @@ vector<_Tp, _Allocator>::__invariants() const return true; } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 template bool @@ -2117,16 +2117,16 @@ vector<_Tp, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __ return this->__begin_ <= __p && __p < this->__end_; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 template inline _LIBCPP_INLINE_VISIBILITY void vector<_Tp, _Allocator>::__invalidate_all_iterators() { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif } @@ -2134,7 +2134,7 @@ template inline _LIBCPP_INLINE_VISIBILITY void vector<_Tp, _Allocator>::__invalidate_iterators_past(pointer __new_last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __p = __c->end_; __p != __c->beg_; ) { --__p; From 870827f65222ebb1a9701d4d0e916f315dc692a8 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 2 Oct 2020 15:07:40 -0400 Subject: [PATCH 414/544] [libc++] NFCI: Remove the _LIBCPP_DEBUG_MODE helper macro It was used inconsistently and the name was pretty confusing, so we might as well use `#if _LIBCPP_DEBUG_LEVEL == 2` consistently everywhere. --- libcxx/include/__debug | 3 --- libcxx/include/__hash_table | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/libcxx/include/__debug b/libcxx/include/__debug index dbf47f6f53073..1829b3279d5a5 100644 --- a/libcxx/include/__debug +++ b/libcxx/include/__debug @@ -29,15 +29,12 @@ #if _LIBCPP_DEBUG_LEVEL == 0 # define _LIBCPP_DEBUG_ASSERT(x, m) ((void)0) -# define _LIBCPP_DEBUG_MODE(...) ((void)0) # define _LIBCPP_ASSERT_IMPL(x, m) ((void)0) #elif _LIBCPP_DEBUG_LEVEL == 1 # define _LIBCPP_DEBUG_ASSERT(x, m) ((void)0) -# define _LIBCPP_DEBUG_MODE(...) ((void)0) # define _LIBCPP_ASSERT_IMPL(x, m) ((x) ? (void)0 : _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) #elif _LIBCPP_DEBUG_LEVEL == 2 # define _LIBCPP_DEBUG_ASSERT(x, m) _LIBCPP_ASSERT(x, m) -# define _LIBCPP_DEBUG_MODE(...) __VA_ARGS__ # define _LIBCPP_ASSERT_IMPL(x, m) ((x) ? (void)0 : _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) #else # error _LIBCPP_DEBUG_LEVEL must be one of 0, 1, 2 diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 8836391b2ab7c..1feb4bc3582f6 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -295,7 +295,9 @@ public: typedef typename _NodeTypes::__node_value_type_pointer pointer; _LIBCPP_INLINE_VISIBILITY __hash_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } #if _LIBCPP_DEBUG_LEVEL == 2 @@ -405,14 +407,18 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } _LIBCPP_INLINE_VISIBILITY __hash_const_iterator(const __non_const_iterator& __x) _NOEXCEPT : __node_(__x.__node_) { - _LIBCPP_DEBUG_MODE(__get_db()->__iterator_copy(this, &__x)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__iterator_copy(this, &__x); +#endif } #if _LIBCPP_DEBUG_LEVEL == 2 @@ -518,7 +524,9 @@ public: typedef typename _NodeTypes::__node_value_type_pointer pointer; _LIBCPP_INLINE_VISIBILITY __hash_local_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } #if _LIBCPP_DEBUG_LEVEL == 2 @@ -650,7 +658,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } _LIBCPP_INLINE_VISIBILITY @@ -659,7 +669,9 @@ public: __bucket_(__x.__bucket_), __bucket_count_(__x.__bucket_count_) { - _LIBCPP_DEBUG_MODE(__get_db()->__iterator_copy(this, &__x)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__iterator_copy(this, &__x); +#endif } #if _LIBCPP_DEBUG_LEVEL == 2 From 0c8f9b8099fd0500cd885bc699924e20371014ff Mon Sep 17 00:00:00 2001 From: ergawy Date: Fri, 2 Oct 2020 14:56:17 -0400 Subject: [PATCH 415/544] [MLIR][SPIRV] Add initial support for OpSpecConstantComposite. This commit adds support to SPIR-V's composite specialization constants. These are specialization constants which are composed of other spec constants (whehter scalar or composite), regular constatns, or undef values. This commit adds support for parsing, printing, verification, and (De)serialization. A few TODOs are still in order: - Supporting more types of constituents; currently, only scalar spec constatns are supported. - Extending `spv._reference_of` to support composite spec constatns. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D88568 --- .../mlir/Dialect/SPIRV/SPIRVStructureOps.td | 54 +++++++- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 90 +++++++++++++ .../SPIRV/Serialization/Deserializer.cpp | 37 +++++ .../SPIRV/Serialization/Serializer.cpp | 42 ++++++ .../SPIRV/Serialization/spec-constant.mlir | 22 ++- mlir/test/Dialect/SPIRV/structure-ops.mlir | 127 ++++++++++++++++++ 6 files changed, 369 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td index 2ac28ef87ba98..0e866f02b011b 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td @@ -491,6 +491,8 @@ def SPV_ReferenceOfOp : SPV_Op<"_reference_of", [NoSideEffect]> { ```mlir %0 = spv._reference_of @spec_const : f32 ``` + + TODO Add support for composite specialization constants. }]; let arguments = (ins @@ -541,8 +543,6 @@ def SPV_SpecConstantOp : SPV_Op<"specConstant", [InModuleScope, Symbol]> { spv.specConstant @spec_const1 = true spv.specConstant @spec_const2 spec_id(5) = 42 : i32 ``` - - TODO: support composite spec constants with another op }]; let arguments = (ins @@ -557,6 +557,56 @@ def SPV_SpecConstantOp : SPV_Op<"specConstant", [InModuleScope, Symbol]> { let autogenSerialization = 0; } +def SPV_SpecConstantCompositeOp : SPV_Op<"specConstantComposite", [InModuleScope, Symbol]> { + let summary = "Declare a new composite specialization constant."; + + let description = [{ + This op declares a SPIR-V composite specialization constant. This covers + the `OpSpecConstantComposite` SPIR-V instruction. Scalar constants are + covered by `spv.specConstant`. + + A constituent of a spec constant composite can be: + - A symbol referring of another spec constant. + - The SSA ID of a non-specialization constant (i.e. defined through + `spv.specConstant`). + - The SSA ID of a `spv.undef`. + + ``` + spv-spec-constant-composite-op ::= `spv.specConstantComposite` symbol-ref-id ` (` + symbol-ref-id (`, ` symbol-ref-id)* + `) :` composite-type + ``` + + where `composite-type` is some non-scalar type that can be represented in the `spv` + dialect: `spv.struct`, `spv.array`, or `vector`. + + #### Example: + + ```mlir + spv.specConstant @sc1 = 1 : i32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct + ``` + + TODO Add support for constituents that are: + - regular constants. + - undef. + - spec constant composite. + }]; + + let arguments = (ins + TypeAttr:$type, + StrAttr:$sym_name, + SymbolRefArrayAttr:$constituents + ); + + let results = (outs); + + let hasOpcode = 0; + + let autogenSerialization = 0; +} // ----- #endif // SPIRV_STRUCTURE_OPS diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index a01177132b27b..363785e2b7822 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -53,6 +53,7 @@ static constexpr const char kTypeAttrName[] = "type"; static constexpr const char kUnequalSemanticsAttrName[] = "unequal_semantics"; static constexpr const char kValueAttrName[] = "value"; static constexpr const char kValuesAttrName[] = "values"; +static constexpr const char kCompositeSpecConstituentsName[] = "constituents"; //===----------------------------------------------------------------------===// // Common utility functions @@ -3287,6 +3288,95 @@ static LogicalResult verifyMatrixTimesMatrix(spirv::MatrixTimesMatrixOp op) { return success(); } +//===----------------------------------------------------------------------===// +// spv.specConstantComposite +//===----------------------------------------------------------------------===// + +static ParseResult parseSpecConstantCompositeOp(OpAsmParser &parser, + OperationState &state) { + + StringAttr compositeName; + if (parser.parseSymbolName(compositeName, SymbolTable::getSymbolAttrName(), + state.attributes)) + return failure(); + + if (parser.parseLParen()) + return failure(); + + SmallVector constituents; + + do { + // The name of the constituent attribute isn't important + const char *attrName = "spec_const"; + FlatSymbolRefAttr specConstRef; + NamedAttrList attrs; + + if (parser.parseAttribute(specConstRef, Type(), attrName, attrs)) + return failure(); + + constituents.push_back(specConstRef); + } while (!parser.parseOptionalComma()); + + if (parser.parseRParen()) + return failure(); + + state.addAttribute(kCompositeSpecConstituentsName, + parser.getBuilder().getArrayAttr(constituents)); + + Type type; + if (parser.parseColonType(type)) + return failure(); + + state.addAttribute(kTypeAttrName, TypeAttr::get(type)); + + return success(); +} + +static void print(spirv::SpecConstantCompositeOp op, OpAsmPrinter &printer) { + printer << spirv::SpecConstantCompositeOp::getOperationName() << " "; + printer.printSymbolName(op.sym_name()); + printer << " ("; + auto constituents = op.constituents().getValue(); + + if (!constituents.empty()) + llvm::interleaveComma(constituents, printer); + + printer << ") : " << op.type(); +} + +static LogicalResult verify(spirv::SpecConstantCompositeOp constOp) { + auto cType = constOp.type().dyn_cast(); + auto constituents = constOp.constituents().getValue(); + + if (!cType) + return constOp.emitError( + "result type must be a composite type, but provided ") + << constOp.type(); + + if (cType.isa()) + return constOp.emitError("unsupported composite type ") << cType; + else if (constituents.size() != cType.getNumElements()) + return constOp.emitError("has incorrect number of operands: expected ") + << cType.getNumElements() << ", but provided " + << constituents.size(); + + for (auto index : llvm::seq(0, constituents.size())) { + auto constituent = constituents[index].dyn_cast(); + + auto constituentSpecConstOp = + dyn_cast(SymbolTable::lookupNearestSymbolFrom( + constOp.getParentOp(), constituent.getValue())); + + if (constituentSpecConstOp.default_value().getType() != + cType.getElementType(index)) + return constOp.emitError("has incorrect types of operands: expected ") + << cType.getElementType(index) << ", but provided " + << constituentSpecConstOp.default_value().getType(); + } + + return success(); +} + namespace mlir { namespace spirv { diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp index b5eea43338243..153540ddb2811 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp @@ -249,6 +249,8 @@ class Deserializer { /// `operands`. LogicalResult processConstantComposite(ArrayRef operands); + LogicalResult processSpecConstantComposite(ArrayRef operands); + /// Processes a SPIR-V OpConstantNull instruction with the given `operands`. LogicalResult processConstantNull(ArrayRef operands); @@ -1546,6 +1548,39 @@ Deserializer::processConstantComposite(ArrayRef operands) { return success(); } +LogicalResult +Deserializer::processSpecConstantComposite(ArrayRef operands) { + if (operands.size() < 2) { + return emitError(unknownLoc, + "OpConstantComposite must have type and result "); + } + if (operands.size() < 3) { + return emitError(unknownLoc, + "OpConstantComposite must have at least 1 parameter"); + } + + Type resultType = getType(operands[0]); + if (!resultType) { + return emitError(unknownLoc, "undefined result type from ") + << operands[0]; + } + + auto symName = opBuilder.getStringAttr(getSpecConstantSymbol(operands[1])); + + SmallVector elements; + elements.reserve(operands.size() - 2); + for (unsigned i = 2, e = operands.size(); i < e; ++i) { + auto elementInfo = getSpecConstant(operands[i]); + elements.push_back(opBuilder.getSymbolRefAttr(elementInfo)); + } + + opBuilder.create( + unknownLoc, TypeAttr::get(resultType), symName, + opBuilder.getArrayAttr(elements)); + + return success(); +} + LogicalResult Deserializer::processConstantNull(ArrayRef operands) { if (operands.size() != 2) { return emitError(unknownLoc, @@ -2276,6 +2311,8 @@ LogicalResult Deserializer::processInstruction(spirv::Opcode opcode, return processConstant(operands, /*isSpec=*/true); case spirv::Opcode::OpConstantComposite: return processConstantComposite(operands); + case spirv::Opcode::OpSpecConstantComposite: + return processSpecConstantComposite(operands); case spirv::Opcode::OpConstantTrue: return processConstantBool(/*isTrue=*/true, operands, /*isSpec=*/false); case spirv::Opcode::OpSpecConstantTrue: diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp index 1eda166a03256..426c838a7e5dd 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp @@ -200,6 +200,9 @@ class Serializer { LogicalResult processSpecConstantOp(spirv::SpecConstantOp op); + LogicalResult + processSpecConstantCompositeOp(spirv::SpecConstantCompositeOp op); + /// SPIR-V dialect supports OpUndef using spv.UndefOp that produces a SSA /// value to use with other operations. The SPIR-V spec recommends that /// OpUndef be generated at module level. The serialization generates an @@ -645,6 +648,42 @@ LogicalResult Serializer::processSpecConstantOp(spirv::SpecConstantOp op) { return failure(); } +LogicalResult +Serializer::processSpecConstantCompositeOp(spirv::SpecConstantCompositeOp op) { + uint32_t typeID = 0; + if (failed(processType(op.getLoc(), op.type(), typeID))) { + return failure(); + } + + auto resultID = getNextID(); + + SmallVector operands; + operands.push_back(typeID); + operands.push_back(resultID); + + auto constituents = op.constituents(); + + for (auto index : llvm::seq(0, constituents.size())) { + auto constituent = constituents[index].dyn_cast(); + + auto constituentName = constituent.getValue(); + auto constituentID = getSpecConstID(constituentName); + + if (!constituentID) { + return op.emitError("unknown result for specialization constant ") + << constituentName; + } + + operands.push_back(constituentID); + } + + encodeInstructionInto(typesGlobalValues, + spirv::Opcode::OpSpecConstantComposite, operands); + specConstIDMap[op.sym_name()] = resultID; + + return processName(resultID, op.sym_name()); +} + LogicalResult Serializer::processUndefOp(spirv::UndefOp op) { auto undefType = op.getType(); auto &id = undefValIDMap[undefType]; @@ -1765,6 +1804,9 @@ LogicalResult Serializer::processOperation(Operation *opInst) { .Case([&](spirv::ReferenceOfOp op) { return processReferenceOfOp(op); }) .Case([&](spirv::SelectionOp op) { return processSelectionOp(op); }) .Case([&](spirv::SpecConstantOp op) { return processSpecConstantOp(op); }) + .Case([&](spirv::SpecConstantCompositeOp op) { + return processSpecConstantCompositeOp(op); + }) .Case([&](spirv::UndefOp op) { return processUndefOp(op); }) .Case([&](spirv::VariableOp op) { return processVariableOp(op); }) diff --git a/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir b/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir index 03cc85b8c087e..0df930162c746 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-translate -test-spirv-roundtrip %s | FileCheck %s +// RUN: mlir-translate -test-spirv-roundtrip -split-input-file %s | FileCheck %s spv.module Logical GLSL450 requires #spv.vce { // CHECK: spv.specConstant @sc_true = true @@ -25,3 +25,23 @@ spv.module Logical GLSL450 requires #spv.vce { spv.ReturnValue %1 : i32 } } + +// ----- + +spv.module Logical GLSL450 requires #spv.vce { + + spv.specConstant @sc_f32_1 = 1.5 : f32 + spv.specConstant @sc_f32_2 = 2.5 : f32 + spv.specConstant @sc_f32_3 = 3.5 : f32 + + spv.specConstant @sc_i32_1 = 1 : i32 + + // CHECK: spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32> + spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32> + + // CHECK: spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct + spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct + + // CHECK: spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3xf32> + spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3 x f32> +} diff --git a/mlir/test/Dialect/SPIRV/structure-ops.mlir b/mlir/test/Dialect/SPIRV/structure-ops.mlir index 98da480b83ff1..765eba959a26b 100644 --- a/mlir/test/Dialect/SPIRV/structure-ops.mlir +++ b/mlir/test/Dialect/SPIRV/structure-ops.mlir @@ -596,3 +596,130 @@ func @use_in_function() -> () { spv.specConstant @sc = false return } + +// ----- + +//===----------------------------------------------------------------------===// +// spv.specConstantComposite +//===----------------------------------------------------------------------===// + +spv.module Logical GLSL450 { + // expected-error @+1 {{result type must be a composite type}} + spv.specConstantComposite @scc2 (@sc1, @sc2, @sc3) : i32 +} + +//===----------------------------------------------------------------------===// +// spv.specConstantComposite (spv.array) +//===----------------------------------------------------------------------===// + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1.5 : f32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // CHECK: spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.array<3 x f32> + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.array<3 x f32> +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = false + spv.specConstant @sc2 spec_id(5) = 42 : i64 + spv.specConstant @sc3 = 1.5 : f32 + // expected-error @+1 {{has incorrect number of operands: expected 4, but provided 3}} + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.array<4 x f32> + +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1 : i32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // expected-error @+1 {{has incorrect types of operands: expected 'f32', but provided 'i32'}} + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.array<3 x f32> +} + +//===----------------------------------------------------------------------===// +// spv.specConstantComposite (spv.struct) +//===----------------------------------------------------------------------===// + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1 : i32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // CHECK: spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1 : i32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // expected-error @+1 {{has incorrect number of operands: expected 2, but provided 3}} + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1.5 : f32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // expected-error @+1 {{has incorrect types of operands: expected 'i32', but provided 'f32'}} + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct +} + +//===----------------------------------------------------------------------===// +// spv.specConstantComposite (vector) +//===----------------------------------------------------------------------===// + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1.5 : f32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // CHECK: spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : vector<3xf32> + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : vector<3 x f32> +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = false + spv.specConstant @sc2 spec_id(5) = 42 : i64 + spv.specConstant @sc3 = 1.5 : f32 + // expected-error @+1 {{has incorrect number of operands: expected 4, but provided 3}} + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : vector<4xf32> + +} + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1 : i32 + spv.specConstant @sc2 = 2.5 : f32 + spv.specConstant @sc3 = 3.5 : f32 + // expected-error @+1 {{has incorrect types of operands: expected 'f32', but provided 'i32'}} + spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : vector<3xf32> +} + +//===----------------------------------------------------------------------===// +// spv.specConstantComposite (spv.coopmatrix) +//===----------------------------------------------------------------------===// + +// ----- + +spv.module Logical GLSL450 { + spv.specConstant @sc1 = 1.5 : f32 + // expected-error @+1 {{unsupported composite type}} + spv.specConstantComposite @scc (@sc1) : !spv.coopmatrix<8x16xf32, Device> +} From 24f406e6d39bfd43eb1ce7fbee525256fa8adc4c Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Fri, 2 Oct 2020 12:13:51 -0700 Subject: [PATCH 416/544] Relax newly added opcode alias check to check only for a number instead of a specific opcode. --- .../CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 63892e4f2ab1f..8d51a246b3596 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -606,11 +606,11 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to 208 +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_MEMSET (opcode {{[0-9]+}}): 3 type indices, 1 imm index -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to 208 +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected From 87b63c1726e24214272cd6c04825c585635d1cdc Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 2 Oct 2020 21:47:03 +0200 Subject: [PATCH 417/544] [MemCpyOpt] Avoid double invalidation (NFCI) The removal of the cpy instruction is left to the caller of performCallSlotOptzn(), including the invalidation of MD. Both call-sites already do this. Also handle incrementation of NumMemCpyInstr consistently at the call-site. One of the call-site was already doing this, which ended up incrementing the statistic twice. This fix was part of D26739. --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 01f3c322b1f49..4d30804f16804 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -953,10 +953,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, LLVMContext::MD_access_group}; combineMetadata(C, cpy, KnownIDs, true); - // Remove the memcpy. - MD->removeInstruction(cpy); - ++NumMemCpyInstr; - return true; } @@ -1272,6 +1268,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { MD->removeInstruction(M); M->eraseFromParent(); + ++NumMemCpyInstr; return true; } } From 94704ed008f78e71aa42a452d8b03c122e0f78cd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 2 Oct 2020 20:42:22 +0200 Subject: [PATCH 418/544] [MemCpyOpt] Add helper to erase instructions (NFC) Next to erasing the instruction, we also always want to remove it from MSSA and MD. Use a common function to do so. This is a refactoring split out from D26739. --- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 1 + .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 73 ++++++------------- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 89a2e24af288b..ea6f37192d5eb 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -70,6 +70,7 @@ class MemCpyOptPass : public PassInfoMixin { Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); + void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); }; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 4d30804f16804..b8c0d20d03218 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -302,6 +302,13 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) +void MemCpyOptPass::eraseInstruction(Instruction *I) { + if (MSSAU) + MSSAU->removeMemoryAccess(I); + MD->removeInstruction(I); + I->eraseFromParent(); +} + /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them @@ -442,12 +449,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, } // Zap all the stores. - for (Instruction *SI : Range.TheStores) { - if (MSSAU) - MSSAU->removeMemoryAccess(SI); - MD->removeInstruction(SI); - SI->eraseFromParent(); - } + for (Instruction *SI : Range.TheStores) + eraseInstruction(SI); ++NumMemSetInfer; } @@ -633,14 +636,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - MSSAU->removeMemoryAccess(SI); - MSSAU->removeMemoryAccess(LI); } - MD->removeInstruction(SI); - SI->eraseFromParent(); - MD->removeInstruction(LI); - LI->eraseFromParent(); + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; // Make sure we do not invalidate the iterator. @@ -685,15 +684,8 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { DL.getTypeStoreSize(SI->getOperand(0)->getType()), commonAlignment(SI->getAlign(), LI->getAlign()), C); if (changed) { - if (MSSAU) { - MSSAU->removeMemoryAccess(SI); - MSSAU->removeMemoryAccess(LI); - } - - MD->removeInstruction(SI); - SI->eraseFromParent(); - MD->removeInstruction(LI); - LI->eraseFromParent(); + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; return true; } @@ -733,11 +725,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - MSSAU->removeMemoryAccess(SI); } - MD->removeInstruction(SI); - SI->eraseFromParent(); + eraseInstruction(SI); NumMemSetInfer++; // Make sure we do not invalidate the iterator. @@ -1028,12 +1018,10 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, auto *LastDef = cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - MSSAU->removeMemoryAccess(M); } // Remove the instruction we're replacing. - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1111,11 +1099,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, auto *NewAccess = MSSAU->createMemoryAccessBefore( NewMemSet, LastDef->getDefiningAccess(), LastDef); MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - MSSAU->removeMemoryAccess(MemSet); } - MD->removeInstruction(MemSet); - MemSet->eraseFromParent(); + eraseInstruction(MemSet); return true; } @@ -1203,11 +1189,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { ++BBI; - if (MSSAU) - MSSAU->removeMemoryAccess(M); - - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); return true; } @@ -1226,11 +1208,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - MSSAU->removeMemoryAccess(M); } - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumCpyToSet; return true; } @@ -1263,11 +1243,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M->getDest(), M->getSource(), CopySize->getZExtValue(), Alignment, C)) { - if (MSSAU) - MSSAU->removeMemoryAccess(M); - - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1283,11 +1259,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { - if (MSSAU) - MSSAU->removeMemoryAccess(M); - - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1296,10 +1268,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (SrcDepInfo.isClobber()) if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) if (performMemCpyToMemSetOptzn(M, MDep)) { - if (MSSAU) - MSSAU->removeMemoryAccess(M); - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumCpyToSet; return true; } From 128e999d63c41e54d5d73c8af47e1ce401e6a200 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 2 Oct 2020 12:56:38 -0700 Subject: [PATCH 419/544] [lldb] Add a "design" section to the documentation. Create a "Design" section for the LLDB documentation. The goal is to have design documents that describe how the LLDB internals work. Currently similar pages are mixed together under the "Development". The existing pages describing the architecture, the reproducers, the structured data plugins, and the SB API could be housed here. I hope we'd see more pages being added here in the future. Differential revision: https://reviews.llvm.org/D88516 --- lldb/docs/.htaccess | 6 +++++- .../architecture.rst => design/overview.rst} | 4 ++-- lldb/docs/{resources => design}/reproducers.rst | 0 lldb/docs/{resources => design}/sbapi.rst | 4 ++-- .../structureddataplugins.md | 0 lldb/docs/index.rst | 15 +++++++++++---- 6 files changed, 20 insertions(+), 9 deletions(-) rename lldb/docs/{resources/architecture.rst => design/overview.rst} (99%) rename lldb/docs/{resources => design}/reproducers.rst (100%) rename lldb/docs/{resources => design}/sbapi.rst (99%) rename lldb/docs/{resources => design}/structureddataplugins.md (100%) diff --git a/lldb/docs/.htaccess b/lldb/docs/.htaccess index 596f4481ab494..31b80359fb5fe 100644 --- a/lldb/docs/.htaccess +++ b/lldb/docs/.htaccess @@ -1,3 +1,4 @@ +# Old website redirects Redirect 301 /architecture/index.html https://lldb.llvm.org/resources/architecture.html Redirect 301 /cpp_reference/html/index.html https://lldb.llvm.org/cpp_reference/index.html Redirect 301 /features.html https://lldb.llvm.org/status/features.html @@ -10,7 +11,10 @@ Redirect 301 /source.html https://lldb.llvm.org/resources/contributing.html Redirect 301 /tutorial.html https://lldb.llvm.org/use/tutorial.html Redirect 301 /varformats.html https://lldb.llvm.org/use/variable.html -# Sphinx redirects +# Current website redirects Redirect 301 /resources/source.html https://lldb.llvm.org/resources/contributing.html Redirect 301 /resources/download.html https://lldb.llvm.org/status/releases.html Redirect 301 /use/architecture.html https://lldb.llvm.org/resources/architecture.html +Redirect 301 /resources/architecture.html https://lldb.llvm.org/design/overview.html +Redirect 301 /resources/reproducers.html https://lldb.llvm.org/design/reproducers.html +Redirect 301 /resources/sbapi.html https://lldb.llvm.org/design/sbapi.html diff --git a/lldb/docs/resources/architecture.rst b/lldb/docs/design/overview.rst similarity index 99% rename from lldb/docs/resources/architecture.rst rename to lldb/docs/design/overview.rst index e87d1248d4011..72eac56d6c3e3 100644 --- a/lldb/docs/resources/architecture.rst +++ b/lldb/docs/design/overview.rst @@ -1,5 +1,5 @@ -Architecture -============ +Overview +======== LLDB is a large and complex codebase. This section will help you become more familiar with the pieces that make up LLDB and give a general overview of the diff --git a/lldb/docs/resources/reproducers.rst b/lldb/docs/design/reproducers.rst similarity index 100% rename from lldb/docs/resources/reproducers.rst rename to lldb/docs/design/reproducers.rst diff --git a/lldb/docs/resources/sbapi.rst b/lldb/docs/design/sbapi.rst similarity index 99% rename from lldb/docs/resources/sbapi.rst rename to lldb/docs/design/sbapi.rst index 048f6c12d9720..674fd680b907a 100644 --- a/lldb/docs/resources/sbapi.rst +++ b/lldb/docs/design/sbapi.rst @@ -1,5 +1,5 @@ -The SB API Coding Rules -======================= +Scripting Bridge API +==================== The SB APIs constitute the stable C++ API that lldb presents to external clients, and which get processed by SWIG to produce the Python bindings to diff --git a/lldb/docs/resources/structureddataplugins.md b/lldb/docs/design/structureddataplugins.md similarity index 100% rename from lldb/docs/resources/structureddataplugins.md rename to lldb/docs/design/structureddataplugins.md diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst index 77c3dbbf6f741..909089f3cebe4 100644 --- a/lldb/docs/index.rst +++ b/lldb/docs/index.rst @@ -140,16 +140,23 @@ interesting areas to contribute to lldb. :maxdepth: 1 :caption: Development - resources/architecture resources/contributing resources/build resources/test resources/bots - resources/reproducers - resources/structureddataplugins - resources/sbapi resources/caveats + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Design + + design/overview + design/reproducers + design/structureddataplugins + design/sbapi + .. toctree:: :hidden: :maxdepth: 1 From 11622d0fed8c1fb99124ebf4a6aece4bcc83b367 Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Fri, 2 Oct 2020 13:08:49 -0700 Subject: [PATCH 420/544] [flang][NFC] Fix mis-matched struct/class declarations The template `ListDirectedStatementState` was declared as a struct and then as a class. Fix it so they match. Differential Revision: https://reviews.llvm.org/D88711 --- flang/runtime/io-stmt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index b5d3caff04f00..686cc0f4cb0ab 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -149,10 +149,11 @@ struct IoStatementBase : public DefaultFormatControlCallbacks { }; // Common state for list-directed internal & external I/O -template struct ListDirectedStatementState; +template class ListDirectedStatementState; template <> -struct ListDirectedStatementState +class ListDirectedStatementState : public FormattedIoStatementState { +public: static std::size_t RemainingSpaceInRecord(const ConnectionState &); bool NeedAdvance(const ConnectionState &, std::size_t) const; bool EmitLeadingSpaceOrAdvance( From 045a620c455d2f27a536d687ee6a0299b9e2c734 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Thu, 1 Oct 2020 18:05:34 +0000 Subject: [PATCH 421/544] Release the shadow memory used by the mmap range at munmap When an application does a lot of pairs of mmap and munmap, if we did not release shadoe memory used by mmap addresses, this would increase memory usage. Reviewed-by: morehouse Differential Revision: https://reviews.llvm.org/D88686 --- compiler-rt/lib/dfsan/dfsan_interceptors.cpp | 13 +++++ .../test/dfsan/munmap_release_shadow.c | 47 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 compiler-rt/test/dfsan/munmap_release_shadow.c diff --git a/compiler-rt/lib/dfsan/dfsan_interceptors.cpp b/compiler-rt/lib/dfsan/dfsan_interceptors.cpp index 12b74df2bd4e0..5ab7c2b4828ca 100644 --- a/compiler-rt/lib/dfsan/dfsan_interceptors.cpp +++ b/compiler-rt/lib/dfsan/dfsan_interceptors.cpp @@ -46,12 +46,25 @@ INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags, return res; } +INTERCEPTOR(int, munmap, void *addr, SIZE_T length) { + int res = REAL(munmap)(addr, length); + if (res != -1) { + uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); + void *end_addr = + (void *)((uptr)addr + RoundUpTo(length, GetPageSizeCached())); + uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr); + ReleaseMemoryPagesToOS(beg_shadow_addr, end_shadow_addr); + } + return res; +} + namespace __dfsan { void InitializeInterceptors() { CHECK(!interceptors_initialized); INTERCEPT_FUNCTION(mmap); INTERCEPT_FUNCTION(mmap64); + INTERCEPT_FUNCTION(munmap); interceptors_initialized = true; } diff --git a/compiler-rt/test/dfsan/munmap_release_shadow.c b/compiler-rt/test/dfsan/munmap_release_shadow.c new file mode 100644 index 0000000000000..085844dfa6927 --- /dev/null +++ b/compiler-rt/test/dfsan/munmap_release_shadow.c @@ -0,0 +1,47 @@ +// RUN: %clang_dfsan %s -o %t && %run %t + +#include +#include +#include +#include +#include +#include +#include + +size_t get_rss_kb() { + long rss = 0L; + FILE *f = NULL; + assert((f = fopen("/proc/self/statm", "r"))); + assert(fscanf(f, "%*s%ld", &rss) == 1); + fclose(f); + return ((size_t)rss * (size_t)sysconf(_SC_PAGESIZE)) >> 10; +} + +int main(int argc, char **argv) { + const size_t map_size = 100 << 20; + size_t before = get_rss_kb(); + + char *p = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + const dfsan_label label = dfsan_create_label("l", 0); + char val = 0xff; + dfsan_set_label(label, &val, sizeof(val)); + memset(p, val, map_size); + size_t after_mmap = get_rss_kb(); + + munmap(p, map_size); + size_t after_munmap = get_rss_kb(); + + fprintf(stderr, "RSS at start: %td, after mmap: %td, after mumap: %td\n", + before, after_mmap, after_munmap); + + // The memory after mmap increases 3 times of map_size because the overhead of + // shadow memory is 2x. + const size_t mmap_cost_kb = 3 * (map_size >> 10); + assert(after_mmap >= before + mmap_cost_kb); + // OS does not release memory to the same level as the start of the program. + // The assert checks the memory after munmap up to a delta. + const size_t delta = 5000; + assert(after_munmap + mmap_cost_kb <= after_mmap + delta); + return 0; +} From 0373c768c56fc7ce6fe1efa48383f7b376e7bc2a Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Sat, 19 Sep 2020 18:53:44 +0300 Subject: [PATCH 422/544] [llvm-objcopy][NFC] refactor error handling. part 3. Remove usages of special error reporting functions(error(), reportError()). Errors are reported as Expected<>/Error returning values. This part is for ELF subfolder of llvm-objcopy. Testing: check-all. Differential Revision: https://reviews.llvm.org/D87987 --- llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp | 1 - llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 104 ++- llvm/tools/llvm-objcopy/ELF/Object.cpp | 844 ++++++++++++------ llvm/tools/llvm-objcopy/ELF/Object.h | 242 ++--- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 1 - llvm/tools/llvm-objcopy/MachO/Object.cpp | 1 - llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 31 - llvm/tools/llvm-objcopy/llvm-objcopy.h | 42 - llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp | 1 - 9 files changed, 756 insertions(+), 511 deletions(-) delete mode 100644 llvm/tools/llvm-objcopy/llvm-objcopy.h diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp index 94325c5f63a27..b5de8a45a80fb 100644 --- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp @@ -12,7 +12,6 @@ #include "Object.h" #include "Reader.h" #include "Writer.h" -#include "llvm-objcopy.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index 66953f9ef0d56..5a34153647ccd 100644 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -10,7 +10,6 @@ #include "Buffer.h" #include "CopyConfig.h" #include "Object.h" -#include "llvm-objcopy.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" @@ -266,19 +265,23 @@ static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink, static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader, StringRef File, ElfType OutputElfType) { - auto DWOFile = Reader.create(false); + Expected> DWOFile = Reader.create(false); + if (!DWOFile) + return DWOFile.takeError(); + auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) { - return onlyKeepDWOPred(*DWOFile, Sec); + return onlyKeepDWOPred(**DWOFile, Sec); }; - if (Error E = DWOFile->removeSections(Config.AllowBrokenLinks, - OnlyKeepDWOPred)) + if (Error E = + (*DWOFile)->removeSections(Config.AllowBrokenLinks, OnlyKeepDWOPred)) return E; if (Config.OutputArch) { - DWOFile->Machine = Config.OutputArch.getValue().EMachine; - DWOFile->OSABI = Config.OutputArch.getValue().OSABI; + (*DWOFile)->Machine = Config.OutputArch.getValue().EMachine; + (*DWOFile)->OSABI = Config.OutputArch.getValue().OSABI; } FileBuffer FB(File); - auto Writer = createWriter(Config, *DWOFile, FB, OutputElfType); + std::unique_ptr Writer = + createWriter(Config, **DWOFile, FB, OutputElfType); if (Error E = Writer->finalize()) return E; return Writer->write(); @@ -313,12 +316,12 @@ static bool isCompressable(const SectionBase &Sec) { StringRef(Sec.Name).startswith(".debug"); } -static void replaceDebugSections( +static Error replaceDebugSections( Object &Obj, SectionPred &RemovePred, function_ref shouldReplace, - function_ref addSection) { + function_ref(const SectionBase *)> addSection) { // Build a list of the debug sections we are going to replace. - // We can't call `addSection` while iterating over sections, + // We can't call `AddSection` while iterating over sections, // because it would mutate the sections array. SmallVector ToReplace; for (auto &Sec : Obj.sections()) @@ -327,8 +330,13 @@ static void replaceDebugSections( // Build a mapping from original section to a new one. DenseMap FromTo; - for (SectionBase *S : ToReplace) - FromTo[S] = addSection(S); + for (SectionBase *S : ToReplace) { + Expected NewSection = addSection(S); + if (!NewSection) + return NewSection.takeError(); + + FromTo[S] = *NewSection; + } // Now we want to update the target sections of relocation // sections. Also we will update the relocations themselves @@ -339,6 +347,8 @@ static void replaceDebugSections( RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) { return shouldReplace(Sec) || RemovePred(Sec); }; + + return Error::success(); } static bool isUnneededSymbol(const Symbol &Sym) { @@ -577,20 +587,28 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { }; } - if (Config.CompressionType != DebugCompressionType::None) - replaceDebugSections(Obj, RemovePred, isCompressable, - [&Config, &Obj](const SectionBase *S) { - return &Obj.addSection( - *S, Config.CompressionType); - }); - else if (Config.DecompressDebugSections) - replaceDebugSections( - Obj, RemovePred, - [](const SectionBase &S) { return isa(&S); }, - [&Obj](const SectionBase *S) { - auto CS = cast(S); - return &Obj.addSection(*CS); - }); + if (Config.CompressionType != DebugCompressionType::None) { + if (Error Err = replaceDebugSections( + Obj, RemovePred, isCompressable, + [&Config, &Obj](const SectionBase *S) -> Expected { + Expected NewSection = + CompressedSection::create(*S, Config.CompressionType); + if (!NewSection) + return NewSection.takeError(); + + return &Obj.addSection(std::move(*NewSection)); + })) + return Err; + } else if (Config.DecompressDebugSections) { + if (Error Err = replaceDebugSections( + Obj, RemovePred, + [](const SectionBase &S) { return isa(&S); }, + [&Obj](const SectionBase *S) { + const CompressedSection *CS = cast(S); + return &Obj.addSection(*CS); + })) + return Err; + } return Obj.removeSections(Config.AllowBrokenLinks, RemovePred); } @@ -740,9 +758,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, // If the symbol table was previously removed, we need to create a new one // before adding new symbols. - if (!Obj.SymbolTable && !Config.ELF->SymbolsToAdd.empty()) { - Obj.addNewSymbolTable(); - } + if (!Obj.SymbolTable && !Config.ELF->SymbolsToAdd.empty()) + if (Error E = Obj.addNewSymbolTable()) + return E; for (const NewSymbolInfo &SI : Config.ELF->SymbolsToAdd) { SectionBase *Sec = Obj.findSection(SI.SectionName); @@ -769,12 +787,15 @@ static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out, Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { IHexReader Reader(&In); - std::unique_ptr Obj = Reader.create(true); + Expected> Obj = Reader.create(true); + if (!Obj) + return Obj.takeError(); + const ElfType OutputElfType = getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); - if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) + if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType)) return E; - return writeOutput(Config, *Obj, Out, OutputElfType); + return writeOutput(Config, **Obj, Out, OutputElfType); } Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, @@ -782,21 +803,26 @@ Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, uint8_t NewSymbolVisibility = Config.ELF->NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT); BinaryReader Reader(&In, NewSymbolVisibility); - std::unique_ptr Obj = Reader.create(true); + Expected> Obj = Reader.create(true); + if (!Obj) + return Obj.takeError(); // Prefer OutputArch (-O) if set, otherwise fallback to BinaryArch // (-B). const ElfType OutputElfType = getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); - if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) + if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType)) return E; - return writeOutput(Config, *Obj, Out, OutputElfType); + return writeOutput(Config, **Obj, Out, OutputElfType); } Error executeObjcopyOnBinary(const CopyConfig &Config, object::ELFObjectFileBase &In, Buffer &Out) { ELFReader Reader(&In, Config.ExtractPartition); - std::unique_ptr Obj = Reader.create(!Config.SymbolsToAdd.empty()); + Expected> Obj = + Reader.create(!Config.SymbolsToAdd.empty()); + if (!Obj) + return Obj.takeError(); // Prefer OutputArch (-O) if set, otherwise infer it from the input. const ElfType OutputElfType = Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue()) @@ -822,10 +848,10 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, Config.BuildIdLinkInput.getValue(), BuildIdBytes)) return E; - if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) + if (Error E = handleArgs(Config, **Obj, Reader, OutputElfType)) return createFileError(Config.InputFilename, std::move(E)); - if (Error E = writeOutput(Config, *Obj, Out, OutputElfType)) + if (Error E = writeOutput(Config, **Obj, Out, OutputElfType)) return createFileError(Config.InputFilename, std::move(E)); if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput) if (Error E = diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp index e19285ee97eac..5eadb5c683bd9 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "Object.h" -#include "llvm-objcopy.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -15,6 +14,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" @@ -60,7 +60,7 @@ Error SectionBase::removeSymbols(function_ref ToRemove) { return Error::success(); } -void SectionBase::initialize(SectionTableRef SecTable) {} +Error SectionBase::initialize(SectionTableRef) { return Error::success(); } void SectionBase::finalize() {} void SectionBase::markSymbols() {} void SectionBase::replaceSectionReferences( @@ -82,72 +82,98 @@ template void ELFWriter::writeShdr(const SectionBase &Sec) { Shdr.sh_entsize = Sec.EntrySize; } -template void ELFSectionSizer::visit(Section &Sec) {} +template Error ELFSectionSizer::visit(Section &) { + return Error::success(); +} -template -void ELFSectionSizer::visit(OwnedDataSection &Sec) {} +template Error ELFSectionSizer::visit(OwnedDataSection &) { + return Error::success(); +} -template -void ELFSectionSizer::visit(StringTableSection &Sec) {} +template Error ELFSectionSizer::visit(StringTableSection &) { + return Error::success(); +} template -void ELFSectionSizer::visit(DynamicRelocationSection &Sec) {} +Error ELFSectionSizer::visit(DynamicRelocationSection &) { + return Error::success(); +} template -void ELFSectionSizer::visit(SymbolTableSection &Sec) { +Error ELFSectionSizer::visit(SymbolTableSection &Sec) { Sec.EntrySize = sizeof(Elf_Sym); Sec.Size = Sec.Symbols.size() * Sec.EntrySize; // Align to the largest field in Elf_Sym. Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + return Error::success(); } template -void ELFSectionSizer::visit(RelocationSection &Sec) { +Error ELFSectionSizer::visit(RelocationSection &Sec) { Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela); Sec.Size = Sec.Relocations.size() * Sec.EntrySize; // Align to the largest field in Elf_Rel(a). Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + return Error::success(); } template -void ELFSectionSizer::visit(GnuDebugLinkSection &Sec) {} +Error ELFSectionSizer::visit(GnuDebugLinkSection &) { + return Error::success(); +} -template void ELFSectionSizer::visit(GroupSection &Sec) { +template Error ELFSectionSizer::visit(GroupSection &Sec) { Sec.Size = sizeof(Elf_Word) + Sec.GroupMembers.size() * sizeof(Elf_Word); + return Error::success(); } template -void ELFSectionSizer::visit(SectionIndexSection &Sec) {} +Error ELFSectionSizer::visit(SectionIndexSection &) { + return Error::success(); +} -template -void ELFSectionSizer::visit(CompressedSection &Sec) {} +template Error ELFSectionSizer::visit(CompressedSection &) { + return Error::success(); +} template -void ELFSectionSizer::visit(DecompressedSection &Sec) {} +Error ELFSectionSizer::visit(DecompressedSection &) { + return Error::success(); +} -void BinarySectionWriter::visit(const SectionIndexSection &Sec) { - error("cannot write symbol section index table '" + Sec.Name + "' "); +Error BinarySectionWriter::visit(const SectionIndexSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write symbol section index table '" + + Sec.Name + "' "); } -void BinarySectionWriter::visit(const SymbolTableSection &Sec) { - error("cannot write symbol table '" + Sec.Name + "' out to binary"); +Error BinarySectionWriter::visit(const SymbolTableSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write symbol table '" + Sec.Name + + "' out to binary"); } -void BinarySectionWriter::visit(const RelocationSection &Sec) { - error("cannot write relocation section '" + Sec.Name + "' out to binary"); +Error BinarySectionWriter::visit(const RelocationSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write relocation section '" + Sec.Name + + "' out to binary"); } -void BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) { - error("cannot write '" + Sec.Name + "' out to binary"); +Error BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write '" + Sec.Name + "' out to binary"); } -void BinarySectionWriter::visit(const GroupSection &Sec) { - error("cannot write '" + Sec.Name + "' out to binary"); +Error BinarySectionWriter::visit(const GroupSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write '" + Sec.Name + "' out to binary"); } -void SectionWriter::visit(const Section &Sec) { +Error SectionWriter::visit(const Section &Sec) { if (Sec.Type != SHT_NOBITS) llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); + + return Error::success(); } static bool addressOverflows32bit(uint64_t Addr) { @@ -352,30 +378,34 @@ uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) { return Base; } -void IHexSectionWriterBase::writeData(uint8_t Type, uint16_t Addr, +void IHexSectionWriterBase::writeData(uint8_t, uint16_t, ArrayRef Data) { Offset += IHexRecord::getLineLength(Data.size()); } -void IHexSectionWriterBase::visit(const Section &Sec) { +Error IHexSectionWriterBase::visit(const Section &Sec) { writeSection(&Sec, Sec.Contents); + return Error::success(); } -void IHexSectionWriterBase::visit(const OwnedDataSection &Sec) { +Error IHexSectionWriterBase::visit(const OwnedDataSection &Sec) { writeSection(&Sec, Sec.Data); + return Error::success(); } -void IHexSectionWriterBase::visit(const StringTableSection &Sec) { +Error IHexSectionWriterBase::visit(const StringTableSection &Sec) { // Check that sizer has already done its work assert(Sec.Size == Sec.StrTabBuilder.getSize()); // We are free to pass an invalid pointer to writeSection as long // as we don't actually write any data. The real writer class has // to override this method . writeSection(&Sec, {nullptr, static_cast(Sec.Size)}); + return Error::success(); } -void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) { +Error IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) { writeSection(&Sec, Sec.Contents); + return Error::success(); } void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr, @@ -385,19 +415,25 @@ void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr, Offset += HexData.size(); } -void IHexSectionWriter::visit(const StringTableSection &Sec) { +Error IHexSectionWriter::visit(const StringTableSection &Sec) { assert(Sec.Size == Sec.StrTabBuilder.getSize()); std::vector Data(Sec.Size); Sec.StrTabBuilder.write(Data.data()); writeSection(&Sec, Data); + return Error::success(); } -void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); } +Error Section::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); +} -void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); } +Error Section::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); +} -void SectionWriter::visit(const OwnedDataSection &Sec) { +Error SectionWriter::visit(const OwnedDataSection &Sec) { llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset); + return Error::success(); } static constexpr std::array ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}}; @@ -424,7 +460,7 @@ getDecompressedSizeAndAlignment(ArrayRef Data) { } template -void ELFSectionWriter::visit(const DecompressedSection &Sec) { +Error ELFSectionWriter::visit(const DecompressedSection &Sec) { const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData) ? (ZlibGnuMagic.size() + sizeof(Sec.Size)) : sizeof(Elf_Chdr_Impl); @@ -434,32 +470,37 @@ void ELFSectionWriter::visit(const DecompressedSection &Sec) { Sec.OriginalData.size() - DataOffset); SmallVector DecompressedContent; - if (Error E = zlib::uncompress(CompressedContent, DecompressedContent, - static_cast(Sec.Size))) - reportError(Sec.Name, std::move(E)); + if (Error Err = zlib::uncompress(CompressedContent, DecompressedContent, + static_cast(Sec.Size))) + return createStringError(errc::invalid_argument, + "'" + Sec.Name + "': " + toString(std::move(Err))); uint8_t *Buf = Out.getBufferStart() + Sec.Offset; std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf); + + return Error::success(); } -void BinarySectionWriter::visit(const DecompressedSection &Sec) { - error("cannot write compressed section '" + Sec.Name + "' "); +Error BinarySectionWriter::visit(const DecompressedSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write compressed section '" + Sec.Name + + "' "); } -void DecompressedSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error DecompressedSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void DecompressedSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error DecompressedSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } -void OwnedDataSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error OwnedDataSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void OwnedDataSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error OwnedDataSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } void OwnedDataSection::appendHexData(StringRef HexData) { @@ -471,16 +512,18 @@ void OwnedDataSection::appendHexData(StringRef HexData) { Size = Data.size(); } -void BinarySectionWriter::visit(const CompressedSection &Sec) { - error("cannot write compressed section '" + Sec.Name + "' "); +Error BinarySectionWriter::visit(const CompressedSection &Sec) { + return createStringError(errc::operation_not_permitted, + "cannot write compressed section '" + Sec.Name + + "' "); } template -void ELFSectionWriter::visit(const CompressedSection &Sec) { +Error ELFSectionWriter::visit(const CompressedSection &Sec) { uint8_t *Buf = Out.getBufferStart() + Sec.Offset; if (Sec.CompressionType == DebugCompressionType::None) { std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf); - return; + return Error::success(); } if (Sec.CompressionType == DebugCompressionType::GNU) { @@ -501,17 +544,42 @@ void ELFSectionWriter::visit(const CompressedSection &Sec) { } std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf); + return Error::success(); +} + +Expected +CompressedSection::create(const SectionBase &Sec, + DebugCompressionType CompressionType) { + Error Err = Error::success(); + CompressedSection Section(Sec, CompressionType, Err); + + if (Err) + return std::move(Err); + + return Section; +} +Expected +CompressedSection::create(ArrayRef CompressedData, + uint64_t DecompressedSize, + uint64_t DecompressedAlign) { + return CompressedSection(CompressedData, DecompressedSize, DecompressedAlign); } CompressedSection::CompressedSection(const SectionBase &Sec, - DebugCompressionType CompressionType) + DebugCompressionType CompressionType, + Error &OutErr) : SectionBase(Sec), CompressionType(CompressionType), DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) { - if (Error E = zlib::compress( + ErrorAsOutParameter EAO(&OutErr); + + if (Error Err = zlib::compress( StringRef(reinterpret_cast(OriginalData.data()), OriginalData.size()), - CompressedData)) - reportError(Name, std::move(E)); + CompressedData)) { + OutErr = createStringError(llvm::errc::invalid_argument, + "'" + Name + "': " + toString(std::move(Err))); + return; + } size_t ChdrSize; if (CompressionType == DebugCompressionType::GNU) { @@ -537,12 +605,12 @@ CompressedSection::CompressedSection(ArrayRef CompressedData, OriginalData = CompressedData; } -void CompressedSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error CompressedSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void CompressedSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error CompressedSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); } @@ -556,42 +624,51 @@ void StringTableSection::prepareForLayout() { Size = StrTabBuilder.getSize(); } -void SectionWriter::visit(const StringTableSection &Sec) { +Error SectionWriter::visit(const StringTableSection &Sec) { Sec.StrTabBuilder.write(Out.getBufferStart() + Sec.Offset); + return Error::success(); } -void StringTableSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error StringTableSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void StringTableSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error StringTableSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } template -void ELFSectionWriter::visit(const SectionIndexSection &Sec) { +Error ELFSectionWriter::visit(const SectionIndexSection &Sec) { uint8_t *Buf = Out.getBufferStart() + Sec.Offset; llvm::copy(Sec.Indexes, reinterpret_cast(Buf)); + return Error::success(); } -void SectionIndexSection::initialize(SectionTableRef SecTable) { +Error SectionIndexSection::initialize(SectionTableRef SecTable) { Size = 0; - setSymTab(SecTable.getSectionOfType( - Link, - "Link field value " + Twine(Link) + " in section " + Name + " is invalid", - "Link field value " + Twine(Link) + " in section " + Name + - " is not a symbol table")); + Expected Sec = + SecTable.getSectionOfType( + Link, + "Link field value " + Twine(Link) + " in section " + Name + + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table"); + if (!Sec) + return Sec.takeError(); + + setSymTab(*Sec); Symbols->setShndxTable(this); + return Error::success(); } void SectionIndexSection::finalize() { Link = Symbols->Index; } -void SectionIndexSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error SectionIndexSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void SectionIndexSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error SectionIndexSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { @@ -718,14 +795,20 @@ void SymbolTableSection::replaceSectionReferences( Sym->DefinedIn = To; } -void SymbolTableSection::initialize(SectionTableRef SecTable) { +Error SymbolTableSection::initialize(SectionTableRef SecTable) { Size = 0; - setStrTab(SecTable.getSectionOfType( - Link, - "Symbol table has link index of " + Twine(Link) + - " which is not a valid index", - "Symbol table has link index of " + Twine(Link) + - " which is not a string table")); + Expected Sec = + SecTable.getSectionOfType( + Link, + "Symbol table has link index of " + Twine(Link) + + " which is not a valid index", + "Symbol table has link index of " + Twine(Link) + + " which is not a string table"); + if (!Sec) + return Sec.takeError(); + + setStrTab(*Sec); + return Error::success(); } void SymbolTableSection::finalize() { @@ -770,19 +853,25 @@ void SymbolTableSection::fillShndxTable() { } } -const Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) const { +Expected +SymbolTableSection::getSymbolByIndex(uint32_t Index) const { if (Symbols.size() <= Index) - error("invalid symbol index: " + Twine(Index)); + return createStringError(errc::invalid_argument, + "invalid symbol index: " + Twine(Index)); return Symbols[Index].get(); } -Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) { - return const_cast( - static_cast(this)->getSymbolByIndex(Index)); +Expected SymbolTableSection::getSymbolByIndex(uint32_t Index) { + Expected Sym = + static_cast(this)->getSymbolByIndex(Index); + if (!Sym) + return Sym.takeError(); + + return const_cast(*Sym); } template -void ELFSectionWriter::visit(const SymbolTableSection &Sec) { +Error ELFSectionWriter::visit(const SymbolTableSection &Sec) { Elf_Sym *Sym = reinterpret_cast(Out.getBufferStart() + Sec.Offset); // Loop though symbols setting each entry of the symbol table. for (const std::unique_ptr &Symbol : Sec.Symbols) { @@ -795,14 +884,15 @@ void ELFSectionWriter::visit(const SymbolTableSection &Sec) { Sym->st_shndx = Symbol->getShndx(); ++Sym; } + return Error::success(); } -void SymbolTableSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error SymbolTableSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void SymbolTableSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } Error RelocationSection::removeSectionReferences( @@ -834,22 +924,33 @@ Error RelocationSection::removeSectionReferences( } template -void RelocSectionWithSymtabBase::initialize( +Error RelocSectionWithSymtabBase::initialize( SectionTableRef SecTable) { - if (Link != SHN_UNDEF) - setSymTab(SecTable.getSectionOfType( + if (Link != SHN_UNDEF) { + Expected Sec = SecTable.getSectionOfType( Link, "Link field value " + Twine(Link) + " in section " + Name + " is invalid", "Link field value " + Twine(Link) + " in section " + Name + - " is not a symbol table")); + " is not a symbol table"); + if (!Sec) + return Sec.takeError(); - if (Info != SHN_UNDEF) - setSection(SecTable.getSection(Info, "Info field value " + Twine(Info) + - " in section " + Name + - " is invalid")); - else + setSymTab(*Sec); + } + + if (Info != SHN_UNDEF) { + Expected Sec = + SecTable.getSection(Info, "Info field value " + Twine(Info) + + " in section " + Name + " is invalid"); + if (!Sec) + return Sec.takeError(); + + setSection(*Sec); + } else setSection(nullptr); + + return Error::success(); } template @@ -880,20 +981,21 @@ static void writeRel(const RelRange &Relocations, T *Buf) { } template -void ELFSectionWriter::visit(const RelocationSection &Sec) { +Error ELFSectionWriter::visit(const RelocationSection &Sec) { uint8_t *Buf = Out.getBufferStart() + Sec.Offset; if (Sec.Type == SHT_REL) writeRel(Sec.Relocations, reinterpret_cast(Buf)); else writeRel(Sec.Relocations, reinterpret_cast(Buf)); + return Error::success(); } -void RelocationSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error RelocationSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void RelocationSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error RelocationSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } Error RelocationSection::removeSymbols( @@ -920,16 +1022,17 @@ void RelocationSection::replaceSectionReferences( SecToApplyRel = To; } -void SectionWriter::visit(const DynamicRelocationSection &Sec) { +Error SectionWriter::visit(const DynamicRelocationSection &Sec) { llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset); + return Error::success(); } -void DynamicRelocationSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error DynamicRelocationSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } Error DynamicRelocationSection::removeSectionReferences( @@ -1015,14 +1118,22 @@ void GroupSection::onRemove() { Sec->Flags &= ~SHF_GROUP; } -void Section::initialize(SectionTableRef SecTable) { +Error Section::initialize(SectionTableRef SecTable) { if (Link == ELF::SHN_UNDEF) - return; - LinkSection = + return Error::success(); + + Expected Sec = SecTable.getSection(Link, "Link field value " + Twine(Link) + " in section " + Name + " is invalid"); + if (!Sec) + return Sec.takeError(); + + LinkSection = *Sec; + if (LinkSection->Type == ELF::SHT_SYMTAB) LinkSection = nullptr; + + return Error::success(); } void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; } @@ -1051,37 +1162,39 @@ GnuDebugLinkSection::GnuDebugLinkSection(StringRef File, } template -void ELFSectionWriter::visit(const GnuDebugLinkSection &Sec) { +Error ELFSectionWriter::visit(const GnuDebugLinkSection &Sec) { unsigned char *Buf = Out.getBufferStart() + Sec.Offset; Elf_Word *CRC = reinterpret_cast(Buf + Sec.Size - sizeof(Elf_Word)); *CRC = Sec.CRC32; llvm::copy(Sec.FileName, Buf); + return Error::success(); } -void GnuDebugLinkSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error GnuDebugLinkSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } template -void ELFSectionWriter::visit(const GroupSection &Sec) { +Error ELFSectionWriter::visit(const GroupSection &Sec) { ELF::Elf32_Word *Buf = reinterpret_cast(Out.getBufferStart() + Sec.Offset); *Buf++ = Sec.FlagWord; for (SectionBase *S : Sec.GroupMembers) support::endian::write32(Buf++, S->Index); + return Error::success(); } -void GroupSection::accept(SectionVisitor &Visitor) const { - Visitor.visit(*this); +Error GroupSection::accept(SectionVisitor &Visitor) const { + return Visitor.visit(*this); } -void GroupSection::accept(MutableSectionVisitor &Visitor) { - Visitor.visit(*this); +Error GroupSection::accept(MutableSectionVisitor &Visitor) { + return Visitor.visit(*this); } // Returns true IFF a section is wholly inside the range of a segment @@ -1161,9 +1274,12 @@ SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) { return &SymTab; } -void BasicELFBuilder::initSections() { +Error BasicELFBuilder::initSections() { for (SectionBase &Sec : Obj->sections()) - Sec.initialize(Obj->sections()); + if (Error Err = Sec.initialize(Obj->sections())) + return Err; + + return Error::success(); } void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { @@ -1190,12 +1306,13 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { 0); } -std::unique_ptr BinaryELFBuilder::build() { +Expected> BinaryELFBuilder::build() { initFileHeader(); initHeaderSegment(); SymbolTableSection *SymTab = addSymTab(addStrTab()); - initSections(); + if (Error Err = initSections()) + return std::move(Err); addData(SymTab); return std::move(Obj); @@ -1246,12 +1363,13 @@ void IHexELFBuilder::addDataSections() { } } -std::unique_ptr IHexELFBuilder::build() { +Expected> IHexELFBuilder::build() { initFileHeader(); initHeaderSegment(); StringTableSection *StrTab = addStrTab(); addSymTab(StrTab); - initSections(); + if (Error Err = initSections()) + return std::move(Err); addDataSections(); return std::move(Obj); @@ -1273,27 +1391,37 @@ template void ELFBuilder::setParentSegment(Segment &Child) { } } -template void ELFBuilder::findEhdrOffset() { +template Error ELFBuilder::findEhdrOffset() { if (!ExtractPartition) - return; + return Error::success(); for (const SectionBase &Sec : Obj.sections()) { if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) { EhdrOffset = Sec.Offset; - return; + return Error::success(); } } - error("could not find partition named '" + *ExtractPartition + "'"); + return createStringError(errc::invalid_argument, + "could not find partition named '" + + *ExtractPartition + "'"); } template -void ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { +Error ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { uint32_t Index = 0; - for (const auto &Phdr : unwrapOrError(HeadersFile.program_headers())) { + + Expected::Elf_Phdr_Range> Headers = + HeadersFile.program_headers(); + if (!Headers) + return Headers.takeError(); + + for (const typename ELFFile::Elf_Phdr &Phdr : *Headers) { if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize()) - error("program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) + - " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) + - " goes past the end of the file"); + return createStringError( + errc::invalid_argument, + "program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) + + " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) + + " goes past the end of the file"); ArrayRef Data{HeadersFile.base() + Phdr.p_offset, (size_t)Phdr.p_filesz}; @@ -1341,13 +1469,16 @@ void ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { setParentSegment(Child); setParentSegment(ElfHdr); setParentSegment(PrHdr); + + return Error::success(); } template -void ELFBuilder::initGroupSection(GroupSection *GroupSec) { +Error ELFBuilder::initGroupSection(GroupSection *GroupSec) { if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0) - error("invalid alignment " + Twine(GroupSec->Align) + " of group section '" + - GroupSec->Name + "'"); + return createStringError(errc::invalid_argument, + "invalid alignment " + Twine(GroupSec->Align) + + " of group section '" + GroupSec->Name + "'"); SectionTableRef SecTable = Obj.sections(); if (GroupSec->Link != SHN_UNDEF) { auto SymTab = SecTable.template getSectionOfType( @@ -1356,16 +1487,23 @@ void ELFBuilder::initGroupSection(GroupSection *GroupSec) { GroupSec->Name + "' is invalid", "link field value '" + Twine(GroupSec->Link) + "' in section '" + GroupSec->Name + "' is not a symbol table"); - Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info); + if (!SymTab) + return SymTab.takeError(); + + Expected Sym = (*SymTab)->getSymbolByIndex(GroupSec->Info); if (!Sym) - error("info field value '" + Twine(GroupSec->Info) + "' in section '" + - GroupSec->Name + "' is not a valid symbol index"); - GroupSec->setSymTab(SymTab); - GroupSec->setSymbol(Sym); + return createStringError(errc::invalid_argument, + "info field value '" + Twine(GroupSec->Info) + + "' in section '" + GroupSec->Name + + "' is not a valid symbol index"); + GroupSec->setSymTab(*SymTab); + GroupSec->setSymbol(*Sym); } if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) || GroupSec->Contents.empty()) - error("the content of the section " + GroupSec->Name + " is malformed"); + return createStringError(errc::invalid_argument, + "the content of the section " + GroupSec->Name + + " is malformed"); const ELF::Elf32_Word *Word = reinterpret_cast(GroupSec->Contents.data()); const ELF::Elf32_Word *End = @@ -1373,57 +1511,99 @@ void ELFBuilder::initGroupSection(GroupSection *GroupSec) { GroupSec->setFlagWord(*Word++); for (; Word != End; ++Word) { uint32_t Index = support::endian::read32(Word); - GroupSec->addMember(SecTable.getSection( + Expected Sec = SecTable.getSection( Index, "group member index " + Twine(Index) + " in section '" + - GroupSec->Name + "' is invalid")); + GroupSec->Name + "' is invalid"); + if (!Sec) + return Sec.takeError(); + + GroupSec->addMember(*Sec); } + + return Error::success(); } template -void ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { - const Elf_Shdr &Shdr = *unwrapOrError(ElfFile.getSection(SymTab->Index)); - StringRef StrTabData = unwrapOrError(ElfFile.getStringTableForSymtab(Shdr)); +Error ELFBuilder::initSymbolTable(SymbolTableSection *SymTab) { + Expected Shdr = ElfFile.getSection(SymTab->Index); + if (!Shdr) + return Shdr.takeError(); + + Expected StrTabData = ElfFile.getStringTableForSymtab(**Shdr); + if (!StrTabData) + return StrTabData.takeError(); + ArrayRef ShndxData; - auto Symbols = unwrapOrError(ElfFile.symbols(&Shdr)); - for (const auto &Sym : Symbols) { + Expected::Elf_Sym_Range> Symbols = + ElfFile.symbols(*Shdr); + if (!Symbols) + return Symbols.takeError(); + + for (const typename ELFFile::Elf_Sym &Sym : *Symbols) { SectionBase *DefSection = nullptr; - StringRef Name = unwrapOrError(Sym.getName(StrTabData)); + + Expected Name = Sym.getName(*StrTabData); + if (!Name) + return Name.takeError(); if (Sym.st_shndx == SHN_XINDEX) { if (SymTab->getShndxTable() == nullptr) - error("symbol '" + Name + - "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists"); + return createStringError(errc::invalid_argument, + "symbol '" + *Name + + "' has index SHN_XINDEX but no " + "SHT_SYMTAB_SHNDX section exists"); if (ShndxData.data() == nullptr) { - const Elf_Shdr &ShndxSec = - *unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index)); - ShndxData = unwrapOrError( - ElfFile.template getSectionContentsAsArray(ShndxSec)); - if (ShndxData.size() != Symbols.size()) - error("symbol section index table does not have the same number of " - "entries as the symbol table"); + Expected ShndxSec = + ElfFile.getSection(SymTab->getShndxTable()->Index); + if (!ShndxSec) + return ShndxSec.takeError(); + + Expected> Data = + ElfFile.template getSectionContentsAsArray(**ShndxSec); + if (!Data) + return Data.takeError(); + + ShndxData = *Data; + if (ShndxData.size() != Symbols->size()) + return createStringError( + errc::invalid_argument, + "symbol section index table does not have the same number of " + "entries as the symbol table"); } - Elf_Word Index = ShndxData[&Sym - Symbols.begin()]; - DefSection = Obj.sections().getSection( + Elf_Word Index = ShndxData[&Sym - Symbols->begin()]; + Expected Sec = Obj.sections().getSection( Index, - "symbol '" + Name + "' has invalid section index " + Twine(Index)); + "symbol '" + *Name + "' has invalid section index " + Twine(Index)); + if (!Sec) + return Sec.takeError(); + + DefSection = *Sec; } else if (Sym.st_shndx >= SHN_LORESERVE) { if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) { - error( - "symbol '" + Name + - "' has unsupported value greater than or equal to SHN_LORESERVE: " + - Twine(Sym.st_shndx)); + return createStringError( + errc::invalid_argument, + "symbol '" + *Name + + "' has unsupported value greater than or equal " + "to SHN_LORESERVE: " + + Twine(Sym.st_shndx)); } } else if (Sym.st_shndx != SHN_UNDEF) { - DefSection = Obj.sections().getSection( - Sym.st_shndx, "symbol '" + Name + + Expected Sec = Obj.sections().getSection( + Sym.st_shndx, "symbol '" + *Name + "' is defined has invalid section index " + Twine(Sym.st_shndx)); + if (!Sec) + return Sec.takeError(); + + DefSection = *Sec; } - SymTab->addSymbol(Name, Sym.getBinding(), Sym.getType(), DefSection, + SymTab->addSymbol(*Name, Sym.getBinding(), Sym.getType(), DefSection, Sym.getValue(), Sym.st_other, Sym.st_shndx, Sym.st_size); } + + return Error::success(); } template @@ -1435,8 +1615,8 @@ static void getAddend(uint64_t &ToSet, const Elf_Rel_Impl &Rela) { } template -static void initRelocations(RelocationSection *Relocs, - SymbolTableSection *SymbolTable, T RelRange) { +static Error initRelocations(RelocationSection *Relocs, + SymbolTableSection *SymbolTable, T RelRange) { for (const auto &Rel : RelRange) { Relocation ToAdd; ToAdd.Offset = Rel.r_offset; @@ -1445,39 +1625,54 @@ static void initRelocations(RelocationSection *Relocs, if (uint32_t Sym = Rel.getSymbol(false)) { if (!SymbolTable) - error("'" + Relocs->Name + - "': relocation references symbol with index " + Twine(Sym) + - ", but there is no symbol table"); - ToAdd.RelocSymbol = SymbolTable->getSymbolByIndex(Sym); + return createStringError( + errc::invalid_argument, + "'" + Relocs->Name + "': relocation references symbol with index " + + Twine(Sym) + ", but there is no symbol table"); + Expected SymByIndex = SymbolTable->getSymbolByIndex(Sym); + if (!SymByIndex) + return SymByIndex.takeError(); + + ToAdd.RelocSymbol = *SymByIndex; } Relocs->addRelocation(ToAdd); } + + return Error::success(); } -SectionBase *SectionTableRef::getSection(uint32_t Index, Twine ErrMsg) { +Expected SectionTableRef::getSection(uint32_t Index, + Twine ErrMsg) { if (Index == SHN_UNDEF || Index > Sections.size()) - error(ErrMsg); + return createStringError(errc::invalid_argument, ErrMsg); return Sections[Index - 1].get(); } template -T *SectionTableRef::getSectionOfType(uint32_t Index, Twine IndexErrMsg, - Twine TypeErrMsg) { - if (T *Sec = dyn_cast(getSection(Index, IndexErrMsg))) +Expected SectionTableRef::getSectionOfType(uint32_t Index, + Twine IndexErrMsg, + Twine TypeErrMsg) { + Expected BaseSec = getSection(Index, IndexErrMsg); + if (!BaseSec) + return BaseSec.takeError(); + + if (T *Sec = dyn_cast(*BaseSec)) return Sec; - error(TypeErrMsg); + + return createStringError(errc::invalid_argument, TypeErrMsg); } template -SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { - ArrayRef Data; +Expected ELFBuilder::makeSection(const Elf_Shdr &Shdr) { switch (Shdr.sh_type) { case SHT_REL: case SHT_RELA: if (Shdr.sh_flags & SHF_ALLOC) { - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - return Obj.addSection(Data); + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); } return Obj.addSection(); case SHT_STRTAB: @@ -1485,25 +1680,35 @@ SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { // mean altering the memory image. There are no special link types or // anything so we can just use a Section. if (Shdr.sh_flags & SHF_ALLOC) { - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - return Obj.addSection
(Data); + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection
(*Data); + else + return Data.takeError(); } return Obj.addSection(); case SHT_HASH: case SHT_GNU_HASH: // Hash tables should refer to SHT_DYNSYM which we're not going to change. // Because of this we don't need to mess with the hash tables either. - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - return Obj.addSection
(Data); + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection
(*Data); + else + return Data.takeError(); case SHT_GROUP: - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - return Obj.addSection(Data); + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); case SHT_DYNSYM: - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - return Obj.addSection(Data); + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); case SHT_DYNAMIC: - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); - return Obj.addSection(Data); + if (Expected> Data = ElfFile.getSectionContents(Shdr)) + return Obj.addSection(*Data); + else + return Data.takeError(); case SHT_SYMTAB: { auto &SymTab = Obj.addSection(); Obj.SymbolTable = &SymTab; @@ -1515,114 +1720,175 @@ SectionBase &ELFBuilder::makeSection(const Elf_Shdr &Shdr) { return ShndxSection; } case SHT_NOBITS: - return Obj.addSection
(Data); + return Obj.addSection
(ArrayRef()); default: { - Data = unwrapOrError(ElfFile.getSectionContents(Shdr)); + Expected> Data = ElfFile.getSectionContents(Shdr); + if (!Data) + return Data.takeError(); + + Expected Name = ElfFile.getSectionName(Shdr); + if (!Name) + return Name.takeError(); - StringRef Name = unwrapOrError(ElfFile.getSectionName(Shdr)); - if (Name.startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) { + if (Name->startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) { uint64_t DecompressedSize, DecompressedAlign; std::tie(DecompressedSize, DecompressedAlign) = - getDecompressedSizeAndAlignment(Data); - return Obj.addSection(Data, DecompressedSize, - DecompressedAlign); + getDecompressedSizeAndAlignment(*Data); + Expected NewSection = + CompressedSection::create(*Data, DecompressedSize, DecompressedAlign); + if (!NewSection) + return NewSection.takeError(); + + return Obj.addSection(std::move(*NewSection)); } - return Obj.addSection
(Data); + return Obj.addSection
(*Data); } } } -template void ELFBuilder::readSectionHeaders() { +template Error ELFBuilder::readSectionHeaders() { uint32_t Index = 0; - for (const auto &Shdr : unwrapOrError(ElfFile.sections())) { + Expected::Elf_Shdr_Range> Sections = + ElfFile.sections(); + if (!Sections) + return Sections.takeError(); + + for (const typename ELFFile::Elf_Shdr &Shdr : *Sections) { if (Index == 0) { ++Index; continue; } - auto &Sec = makeSection(Shdr); - Sec.Name = std::string(unwrapOrError(ElfFile.getSectionName(Shdr))); - Sec.Type = Sec.OriginalType = Shdr.sh_type; - Sec.Flags = Sec.OriginalFlags = Shdr.sh_flags; - Sec.Addr = Shdr.sh_addr; - Sec.Offset = Shdr.sh_offset; - Sec.OriginalOffset = Shdr.sh_offset; - Sec.Size = Shdr.sh_size; - Sec.Link = Shdr.sh_link; - Sec.Info = Shdr.sh_info; - Sec.Align = Shdr.sh_addralign; - Sec.EntrySize = Shdr.sh_entsize; - Sec.Index = Index++; - Sec.OriginalData = + Expected Sec = makeSection(Shdr); + if (!Sec) + return Sec.takeError(); + + Expected SecName = ElfFile.getSectionName(Shdr); + if (!SecName) + return SecName.takeError(); + Sec->Name = SecName->str(); + Sec->Type = Sec->OriginalType = Shdr.sh_type; + Sec->Flags = Sec->OriginalFlags = Shdr.sh_flags; + Sec->Addr = Shdr.sh_addr; + Sec->Offset = Shdr.sh_offset; + Sec->OriginalOffset = Shdr.sh_offset; + Sec->Size = Shdr.sh_size; + Sec->Link = Shdr.sh_link; + Sec->Info = Shdr.sh_info; + Sec->Align = Shdr.sh_addralign; + Sec->EntrySize = Shdr.sh_entsize; + Sec->Index = Index++; + Sec->OriginalData = ArrayRef(ElfFile.base() + Shdr.sh_offset, (Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size); } + + return Error::success(); } -template void ELFBuilder::readSections(bool EnsureSymtab) { +template Error ELFBuilder::readSections(bool EnsureSymtab) { uint32_t ShstrIndex = ElfFile.getHeader().e_shstrndx; - if (ShstrIndex == SHN_XINDEX) - ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link; + if (ShstrIndex == SHN_XINDEX) { + Expected Sec = ElfFile.getSection(0); + if (!Sec) + return Sec.takeError(); + + ShstrIndex = (*Sec)->sh_link; + } if (ShstrIndex == SHN_UNDEF) Obj.HadShdrs = false; - else - Obj.SectionNames = + else { + Expected Sec = Obj.sections().template getSectionOfType( ShstrIndex, "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + " is invalid", "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " + " does not reference a string table"); + if (!Sec) + return Sec.takeError(); + + Obj.SectionNames = *Sec; + } // If a section index table exists we'll need to initialize it before we // initialize the symbol table because the symbol table might need to // reference it. if (Obj.SectionIndexTable) - Obj.SectionIndexTable->initialize(Obj.sections()); + if (Error Err = Obj.SectionIndexTable->initialize(Obj.sections())) + return Err; // Now that all of the sections have been added we can fill out some extra // details about symbol tables. We need the symbol table filled out before // any relocations. if (Obj.SymbolTable) { - Obj.SymbolTable->initialize(Obj.sections()); - initSymbolTable(Obj.SymbolTable); + if (Error Err = Obj.SymbolTable->initialize(Obj.sections())) + return Err; + if (Error Err = initSymbolTable(Obj.SymbolTable)) + return Err; } else if (EnsureSymtab) { - Obj.addNewSymbolTable(); + if (Error Err = Obj.addNewSymbolTable()) + return Err; } // Now that all sections and symbols have been added we can add // relocations that reference symbols and set the link and info fields for // relocation sections. - for (auto &Sec : Obj.sections()) { + for (SectionBase &Sec : Obj.sections()) { if (&Sec == Obj.SymbolTable) continue; - Sec.initialize(Obj.sections()); + if (Error Err = Sec.initialize(Obj.sections())) + return Err; if (auto RelSec = dyn_cast(&Sec)) { - auto Shdr = unwrapOrError(ElfFile.sections()).begin() + RelSec->Index; - if (RelSec->Type == SHT_REL) - initRelocations(RelSec, Obj.SymbolTable, - unwrapOrError(ElfFile.rels(*Shdr))); - else - initRelocations(RelSec, Obj.SymbolTable, - unwrapOrError(ElfFile.relas(*Shdr))); + Expected::Elf_Shdr_Range> Sections = + ElfFile.sections(); + if (!Sections) + return Sections.takeError(); + + const typename ELFFile::Elf_Shdr *Shdr = + Sections->begin() + RelSec->Index; + if (RelSec->Type == SHT_REL) { + Expected::Elf_Rel_Range> Rels = + ElfFile.rels(*Shdr); + if (!Rels) + return Rels.takeError(); + + if (Error Err = initRelocations(RelSec, Obj.SymbolTable, *Rels)) + return Err; + } else { + Expected::Elf_Rela_Range> Relas = + ElfFile.relas(*Shdr); + if (!Relas) + return Relas.takeError(); + + if (Error Err = initRelocations(RelSec, Obj.SymbolTable, *Relas)) + return Err; + } } else if (auto GroupSec = dyn_cast(&Sec)) { - initGroupSection(GroupSec); + if (Error Err = initGroupSection(GroupSec)) + return Err; } } + + return Error::success(); } -template void ELFBuilder::build(bool EnsureSymtab) { - readSectionHeaders(); - findEhdrOffset(); +template Error ELFBuilder::build(bool EnsureSymtab) { + if (Error E = readSectionHeaders()) + return E; + if (Error E = findEhdrOffset()) + return E; // The ELFFile whose ELF headers and program headers are copied into the // output file. Normally the same as ElfFile, but if we're extracting a // loadable partition it will point to the partition's headers. - ELFFile HeadersFile = unwrapOrError(ELFFile::create(toStringRef( - {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset}))); + Expected> HeadersFile = ELFFile::create(toStringRef( + {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})); + if (!HeadersFile) + return HeadersFile.takeError(); - auto &Ehdr = HeadersFile.getHeader(); + const typename ELFFile::Elf_Ehdr &Ehdr = HeadersFile->getHeader(); Obj.OSABI = Ehdr.e_ident[EI_OSABI]; Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION]; Obj.Type = Ehdr.e_type; @@ -1631,15 +1897,17 @@ template void ELFBuilder::build(bool EnsureSymtab) { Obj.Entry = Ehdr.e_entry; Obj.Flags = Ehdr.e_flags; - readSections(EnsureSymtab); - readProgramHeaders(HeadersFile); + if (Error E = readSections(EnsureSymtab)) + return E; + return readProgramHeaders(*HeadersFile); } Writer::~Writer() {} Reader::~Reader() {} -std::unique_ptr BinaryReader::create(bool /*EnsureSymtab*/) const { +Expected> +BinaryReader::create(bool /*EnsureSymtab*/) const { return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build(); } @@ -1669,31 +1937,39 @@ Expected> IHexReader::parse() const { return std::move(Records); } -std::unique_ptr IHexReader::create(bool /*EnsureSymtab*/) const { - std::vector Records = unwrapOrError(parse()); - return IHexELFBuilder(Records).build(); +Expected> +IHexReader::create(bool /*EnsureSymtab*/) const { + Expected> Records = parse(); + if (!Records) + return Records.takeError(); + + return IHexELFBuilder(*Records).build(); } -std::unique_ptr ELFReader::create(bool EnsureSymtab) const { +Expected> ELFReader::create(bool EnsureSymtab) const { auto Obj = std::make_unique(); if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(EnsureSymtab); - return Obj; + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(EnsureSymtab); - return Obj; + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(EnsureSymtab); - return Obj; + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(EnsureSymtab); - return Obj; + if (Error Err = Builder.build(EnsureSymtab)) + return std::move(Err); + return std::move(Obj); } - error("invalid file type"); + return createStringError(errc::invalid_argument, "invalid file type"); } template void ELFWriter::writeEhdr() { @@ -1787,13 +2063,16 @@ template void ELFWriter::writeShdrs() { writeShdr(Sec); } -template void ELFWriter::writeSectionData() { +template Error ELFWriter::writeSectionData() { for (SectionBase &Sec : Obj.sections()) // Segments are responsible for writing their contents, so only write the // section data if the section is not in a segment. Note that this renders // sections in segments effectively immutable. if (Sec.ParentSegment == nullptr) - Sec.accept(*SecWriter); + if (Error Err = Sec.accept(*SecWriter)) + return Err; + + return Error::success(); } template void ELFWriter::writeSegmentData() { @@ -1880,7 +2159,7 @@ Error Object::removeSymbols(function_ref ToRemove) { return Error::success(); } -void Object::addNewSymbolTable() { +Error Object::addNewSymbolTable() { assert(!SymbolTable && "Object must not has a SymbolTable."); // Reuse an existing SHT_STRTAB section if it exists. @@ -1901,10 +2180,13 @@ void Object::addNewSymbolTable() { SymbolTableSection &SymTab = addSection(); SymTab.Name = ".symtab"; SymTab.Link = StrTab->Index; - SymTab.initialize(sections()); + if (Error Err = SymTab.initialize(sections())) + return Err; SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); SymbolTable = &SymTab; + + return Error::success(); } void Object::sortSections() { @@ -2118,7 +2400,8 @@ template Error ELFWriter::write() { writeSegmentData(); writeEhdr(); writePhdrs(); - writeSectionData(); + if (Error E = writeSectionData()) + return E; if (WriteSectionHeaders) writeShdrs(); return Buf.commit(); @@ -2207,7 +2490,8 @@ template Error ELFWriter::finalize() { auto SecSizer = std::make_unique>(); for (SectionBase &Sec : Obj.sections()) { Sec.Index = Index++; - Sec.accept(*SecSizer); + if (Error Err = Sec.accept(*SecSizer)) + return Err; } // The symbol table does not update all other sections on update. For @@ -2248,7 +2532,9 @@ template Error ELFWriter::finalize() { Error BinaryWriter::write() { for (const SectionBase &Sec : Obj.allocSections()) - Sec.accept(*SecWriter); + if (Error Err = Sec.accept(*SecWriter)) + return Err; + return Buf.commit(); } @@ -2320,7 +2606,8 @@ Error IHexWriter::write() { IHexSectionWriter Writer(Buf); // Write sections. for (const SectionBase *Sec : Sections) - Sec->accept(Writer); + if (Error Err = Sec->accept(Writer)) + return Err; uint64_t Offset = Writer.getBufferOffset(); // Write entry point address. @@ -2374,7 +2661,8 @@ Error IHexWriter::finalize() { IHexSectionWriterBase LengthCalc(Buf); for (const SectionBase *Sec : Sections) - Sec->accept(LengthCalc); + if (Error Err = Sec->accept(LengthCalc)) + return Err; // We need space to write section records + StartAddress record // (if start adress is not zero) + EndOfFile record. diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h index ed89e916b8385..8fee4e29e964d 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ b/llvm/tools/llvm-objcopy/ELF/Object.h @@ -61,10 +61,11 @@ class SectionTableRef { iterator end() const { return iterator(Sections.data() + Sections.size()); } size_t size() const { return Sections.size(); } - SectionBase *getSection(uint32_t Index, Twine ErrMsg); + Expected getSection(uint32_t Index, Twine ErrMsg); template - T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg); + Expected getSectionOfType(uint32_t Index, Twine IndexErrMsg, + Twine TypeErrMsg); }; enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; @@ -73,34 +74,34 @@ class SectionVisitor { public: virtual ~SectionVisitor() = default; - virtual void visit(const Section &Sec) = 0; - virtual void visit(const OwnedDataSection &Sec) = 0; - virtual void visit(const StringTableSection &Sec) = 0; - virtual void visit(const SymbolTableSection &Sec) = 0; - virtual void visit(const RelocationSection &Sec) = 0; - virtual void visit(const DynamicRelocationSection &Sec) = 0; - virtual void visit(const GnuDebugLinkSection &Sec) = 0; - virtual void visit(const GroupSection &Sec) = 0; - virtual void visit(const SectionIndexSection &Sec) = 0; - virtual void visit(const CompressedSection &Sec) = 0; - virtual void visit(const DecompressedSection &Sec) = 0; + virtual Error visit(const Section &Sec) = 0; + virtual Error visit(const OwnedDataSection &Sec) = 0; + virtual Error visit(const StringTableSection &Sec) = 0; + virtual Error visit(const SymbolTableSection &Sec) = 0; + virtual Error visit(const RelocationSection &Sec) = 0; + virtual Error visit(const DynamicRelocationSection &Sec) = 0; + virtual Error visit(const GnuDebugLinkSection &Sec) = 0; + virtual Error visit(const GroupSection &Sec) = 0; + virtual Error visit(const SectionIndexSection &Sec) = 0; + virtual Error visit(const CompressedSection &Sec) = 0; + virtual Error visit(const DecompressedSection &Sec) = 0; }; class MutableSectionVisitor { public: virtual ~MutableSectionVisitor() = default; - virtual void visit(Section &Sec) = 0; - virtual void visit(OwnedDataSection &Sec) = 0; - virtual void visit(StringTableSection &Sec) = 0; - virtual void visit(SymbolTableSection &Sec) = 0; - virtual void visit(RelocationSection &Sec) = 0; - virtual void visit(DynamicRelocationSection &Sec) = 0; - virtual void visit(GnuDebugLinkSection &Sec) = 0; - virtual void visit(GroupSection &Sec) = 0; - virtual void visit(SectionIndexSection &Sec) = 0; - virtual void visit(CompressedSection &Sec) = 0; - virtual void visit(DecompressedSection &Sec) = 0; + virtual Error visit(Section &Sec) = 0; + virtual Error visit(OwnedDataSection &Sec) = 0; + virtual Error visit(StringTableSection &Sec) = 0; + virtual Error visit(SymbolTableSection &Sec) = 0; + virtual Error visit(RelocationSection &Sec) = 0; + virtual Error visit(DynamicRelocationSection &Sec) = 0; + virtual Error visit(GnuDebugLinkSection &Sec) = 0; + virtual Error visit(GroupSection &Sec) = 0; + virtual Error visit(SectionIndexSection &Sec) = 0; + virtual Error visit(CompressedSection &Sec) = 0; + virtual Error visit(DecompressedSection &Sec) = 0; }; class SectionWriter : public SectionVisitor { @@ -110,17 +111,17 @@ class SectionWriter : public SectionVisitor { public: virtual ~SectionWriter() = default; - void visit(const Section &Sec) override; - void visit(const OwnedDataSection &Sec) override; - void visit(const StringTableSection &Sec) override; - void visit(const DynamicRelocationSection &Sec) override; - virtual void visit(const SymbolTableSection &Sec) override = 0; - virtual void visit(const RelocationSection &Sec) override = 0; - virtual void visit(const GnuDebugLinkSection &Sec) override = 0; - virtual void visit(const GroupSection &Sec) override = 0; - virtual void visit(const SectionIndexSection &Sec) override = 0; - virtual void visit(const CompressedSection &Sec) override = 0; - virtual void visit(const DecompressedSection &Sec) override = 0; + Error visit(const Section &Sec) override; + Error visit(const OwnedDataSection &Sec) override; + Error visit(const StringTableSection &Sec) override; + Error visit(const DynamicRelocationSection &Sec) override; + virtual Error visit(const SymbolTableSection &Sec) override = 0; + virtual Error visit(const RelocationSection &Sec) override = 0; + virtual Error visit(const GnuDebugLinkSection &Sec) override = 0; + virtual Error visit(const GroupSection &Sec) override = 0; + virtual Error visit(const SectionIndexSection &Sec) override = 0; + virtual Error visit(const CompressedSection &Sec) override = 0; + virtual Error visit(const DecompressedSection &Sec) override = 0; explicit SectionWriter(Buffer &Buf) : Out(Buf) {} }; @@ -134,13 +135,13 @@ template class ELFSectionWriter : public SectionWriter { public: virtual ~ELFSectionWriter() {} - void visit(const SymbolTableSection &Sec) override; - void visit(const RelocationSection &Sec) override; - void visit(const GnuDebugLinkSection &Sec) override; - void visit(const GroupSection &Sec) override; - void visit(const SectionIndexSection &Sec) override; - void visit(const CompressedSection &Sec) override; - void visit(const DecompressedSection &Sec) override; + Error visit(const SymbolTableSection &Sec) override; + Error visit(const RelocationSection &Sec) override; + Error visit(const GnuDebugLinkSection &Sec) override; + Error visit(const GroupSection &Sec) override; + Error visit(const SectionIndexSection &Sec) override; + Error visit(const CompressedSection &Sec) override; + Error visit(const DecompressedSection &Sec) override; explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; @@ -154,17 +155,17 @@ template class ELFSectionSizer : public MutableSectionVisitor { using Elf_Xword = typename ELFT::Xword; public: - void visit(Section &Sec) override; - void visit(OwnedDataSection &Sec) override; - void visit(StringTableSection &Sec) override; - void visit(DynamicRelocationSection &Sec) override; - void visit(SymbolTableSection &Sec) override; - void visit(RelocationSection &Sec) override; - void visit(GnuDebugLinkSection &Sec) override; - void visit(GroupSection &Sec) override; - void visit(SectionIndexSection &Sec) override; - void visit(CompressedSection &Sec) override; - void visit(DecompressedSection &Sec) override; + Error visit(Section &Sec) override; + Error visit(OwnedDataSection &Sec) override; + Error visit(StringTableSection &Sec) override; + Error visit(DynamicRelocationSection &Sec) override; + Error visit(SymbolTableSection &Sec) override; + Error visit(RelocationSection &Sec) override; + Error visit(GnuDebugLinkSection &Sec) override; + Error visit(GroupSection &Sec) override; + Error visit(SectionIndexSection &Sec) override; + Error visit(CompressedSection &Sec) override; + Error visit(DecompressedSection &Sec) override; }; #define MAKE_SEC_WRITER_FRIEND \ @@ -178,13 +179,13 @@ class BinarySectionWriter : public SectionWriter { public: virtual ~BinarySectionWriter() {} - void visit(const SymbolTableSection &Sec) override; - void visit(const RelocationSection &Sec) override; - void visit(const GnuDebugLinkSection &Sec) override; - void visit(const GroupSection &Sec) override; - void visit(const SectionIndexSection &Sec) override; - void visit(const CompressedSection &Sec) override; - void visit(const DecompressedSection &Sec) override; + Error visit(const SymbolTableSection &Sec) override; + Error visit(const RelocationSection &Sec) override; + Error visit(const GnuDebugLinkSection &Sec) override; + Error visit(const GroupSection &Sec) override; + Error visit(const SectionIndexSection &Sec) override; + Error visit(const CompressedSection &Sec) override; + Error visit(const DecompressedSection &Sec) override; explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; @@ -285,10 +286,10 @@ class IHexSectionWriterBase : public BinarySectionWriter { explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {} uint64_t getBufferOffset() const { return Offset; } - void visit(const Section &Sec) final; - void visit(const OwnedDataSection &Sec) final; - void visit(const StringTableSection &Sec) override; - void visit(const DynamicRelocationSection &Sec) final; + Error visit(const Section &Sec) final; + Error visit(const OwnedDataSection &Sec) final; + Error visit(const StringTableSection &Sec) override; + Error visit(const DynamicRelocationSection &Sec) final; using BinarySectionWriter::visit; }; @@ -298,7 +299,7 @@ class IHexSectionWriter : public IHexSectionWriterBase { IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {} void writeData(uint8_t Type, uint16_t Addr, ArrayRef Data) override; - void visit(const StringTableSection &Sec) override; + Error visit(const StringTableSection &Sec) override; }; class Writer { @@ -329,7 +330,7 @@ template class ELFWriter : public Writer { void writePhdrs(); void writeShdrs(); - void writeSectionData(); + Error writeSectionData(); void writeSegmentData(); void assignOffsets(); @@ -412,15 +413,15 @@ class SectionBase { virtual ~SectionBase() = default; - virtual void initialize(SectionTableRef SecTable); + virtual Error initialize(SectionTableRef SecTable); virtual void finalize(); // Remove references to these sections. The list of sections must be sorted. virtual Error removeSectionReferences(bool AllowBrokenLinks, function_ref ToRemove); virtual Error removeSymbols(function_ref ToRemove); - virtual void accept(SectionVisitor &Visitor) const = 0; - virtual void accept(MutableSectionVisitor &Visitor) = 0; + virtual Error accept(SectionVisitor &Visitor) const = 0; + virtual Error accept(MutableSectionVisitor &Visitor) = 0; virtual void markSymbols(); virtual void replaceSectionReferences(const DenseMap &); @@ -481,11 +482,11 @@ class Section : public SectionBase { public: explicit Section(ArrayRef Data) : Contents(Data) {} - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; Error removeSectionReferences(bool AllowBrokenLinks, function_ref ToRemove) override; - void initialize(SectionTableRef SecTable) override; + Error initialize(SectionTableRef SecTable) override; void finalize() override; }; @@ -513,8 +514,8 @@ class OwnedDataSection : public SectionBase { } void appendHexData(StringRef HexData); - void accept(SectionVisitor &Sec) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Sec) const override; + Error accept(MutableSectionVisitor &Visitor) override; }; class CompressedSection : public SectionBase { @@ -526,21 +527,28 @@ class CompressedSection : public SectionBase { SmallVector CompressedData; public: - CompressedSection(const SectionBase &Sec, - DebugCompressionType CompressionType); - CompressedSection(ArrayRef CompressedData, uint64_t DecompressedSize, - uint64_t DecompressedAlign); + static Expected + create(const SectionBase &Sec, DebugCompressionType CompressionType); + static Expected create(ArrayRef CompressedData, + uint64_t DecompressedSize, + uint64_t DecompressedAlign); uint64_t getDecompressedSize() const { return DecompressedSize; } uint64_t getDecompressedAlign() const { return DecompressedAlign; } - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; static bool classof(const SectionBase *S) { return (S->OriginalFlags & ELF::SHF_COMPRESSED) || (StringRef(S->Name).startswith(".zdebug")); } + +private: + CompressedSection(const SectionBase &Sec, + DebugCompressionType CompressionType, Error &Err); + CompressedSection(ArrayRef CompressedData, uint64_t DecompressedSize, + uint64_t DecompressedAlign); }; class DecompressedSection : public SectionBase { @@ -556,8 +564,8 @@ class DecompressedSection : public SectionBase { Name = "." + Name.substr(2); } - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; }; // There are two types of string tables that can exist, dynamic and not dynamic. @@ -581,8 +589,8 @@ class StringTableSection : public SectionBase { void addString(StringRef Name); uint32_t findIndex(StringRef Name) const; void prepareForLayout(); - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; static bool classof(const SectionBase *S) { if (S->OriginalFlags & ELF::SHF_ALLOC) @@ -647,10 +655,10 @@ class SectionIndexSection : public SectionBase { Size = NumSymbols * 4; } void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } - void initialize(SectionTableRef SecTable) override; + Error initialize(SectionTableRef SecTable) override; void finalize() override; - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; SectionIndexSection() { Name = ".symtab_shndx"; @@ -688,16 +696,16 @@ class SymbolTableSection : public SectionBase { const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } void fillShndxTable(); const SectionBase *getStrTab() const { return SymbolNames; } - const Symbol *getSymbolByIndex(uint32_t Index) const; - Symbol *getSymbolByIndex(uint32_t Index); + Expected getSymbolByIndex(uint32_t Index) const; + Expected getSymbolByIndex(uint32_t Index); void updateSymbols(function_ref Callable); Error removeSectionReferences(bool AllowBrokenLinks, function_ref ToRemove) override; - void initialize(SectionTableRef SecTable) override; + Error initialize(SectionTableRef SecTable) override; void finalize() override; - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; Error removeSymbols(function_ref ToRemove) override; void replaceSectionReferences( const DenseMap &FromTo) override; @@ -748,7 +756,7 @@ class RelocSectionWithSymtabBase : public RelocationSectionBase { SymTabType *Symbols = nullptr; public: - void initialize(SectionTableRef SecTable) override; + Error initialize(SectionTableRef SecTable) override; void finalize() override; }; @@ -760,8 +768,8 @@ class RelocationSection public: void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; Error removeSectionReferences(bool AllowBrokenLinks, function_ref ToRemove) override; Error removeSymbols(function_ref ToRemove) override; @@ -798,8 +806,8 @@ class GroupSection : public SectionBase { void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; } void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); } - void accept(SectionVisitor &) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &) const override; + Error accept(MutableSectionVisitor &Visitor) override; void finalize() override; Error removeSectionReferences( bool AllowBrokenLinks, @@ -843,8 +851,8 @@ class DynamicRelocationSection public: explicit DynamicRelocationSection(ArrayRef Data) : Contents(Data) {} - void accept(SectionVisitor &) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &) const override; + Error accept(MutableSectionVisitor &Visitor) override; Error removeSectionReferences( bool AllowBrokenLinks, function_ref ToRemove) override; @@ -868,14 +876,14 @@ class GnuDebugLinkSection : public SectionBase { public: // If we add this section from an external source we can use this ctor. explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC); - void accept(SectionVisitor &Visitor) const override; - void accept(MutableSectionVisitor &Visitor) override; + Error accept(SectionVisitor &Visitor) const override; + Error accept(MutableSectionVisitor &Visitor) override; }; class Reader { public: virtual ~Reader(); - virtual std::unique_ptr create(bool EnsureSymtab) const = 0; + virtual Expected> create(bool EnsureSymtab) const = 0; }; using object::Binary; @@ -891,7 +899,7 @@ class BasicELFBuilder { void initHeaderSegment(); StringTableSection *addStrTab(); SymbolTableSection *addSymTab(StringTableSection *StrTab); - void initSections(); + Error initSections(); public: BasicELFBuilder() : Obj(std::make_unique()) {} @@ -907,7 +915,7 @@ class BinaryELFBuilder : public BasicELFBuilder { : BasicELFBuilder(), MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} - std::unique_ptr build(); + Expected> build(); }; class IHexELFBuilder : public BasicELFBuilder { @@ -919,7 +927,7 @@ class IHexELFBuilder : public BasicELFBuilder { IHexELFBuilder(const std::vector &Records) : BasicELFBuilder(), Records(Records) {} - std::unique_ptr build(); + Expected> build(); }; template class ELFBuilder { @@ -934,13 +942,13 @@ template class ELFBuilder { Optional ExtractPartition; void setParentSegment(Segment &Child); - void readProgramHeaders(const ELFFile &HeadersFile); - void initGroupSection(GroupSection *GroupSec); - void initSymbolTable(SymbolTableSection *SymTab); - void readSectionHeaders(); - void readSections(bool EnsureSymtab); - void findEhdrOffset(); - SectionBase &makeSection(const Elf_Shdr &Shdr); + Error readProgramHeaders(const ELFFile &HeadersFile); + Error initGroupSection(GroupSection *GroupSec); + Error initSymbolTable(SymbolTableSection *SymTab); + Error readSectionHeaders(); + Error readSections(bool EnsureSymtab); + Error findEhdrOffset(); + Expected makeSection(const Elf_Shdr &Shdr); public: ELFBuilder(const ELFObjectFile &ElfObj, Object &Obj, @@ -948,7 +956,7 @@ template class ELFBuilder { : ElfFile(*ElfObj.getELFFile()), Obj(Obj), ExtractPartition(ExtractPartition) {} - void build(bool EnsureSymtab); + Error build(bool EnsureSymtab); }; class BinaryReader : public Reader { @@ -958,7 +966,7 @@ class BinaryReader : public Reader { public: BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} - std::unique_ptr create(bool EnsureSymtab) const override; + Expected> create(bool EnsureSymtab) const override; }; class IHexReader : public Reader { @@ -980,7 +988,7 @@ class IHexReader : public Reader { public: IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} - std::unique_ptr create(bool EnsureSymtab) const override; + Expected> create(bool EnsureSymtab) const override; }; class ELFReader : public Reader { @@ -988,7 +996,7 @@ class ELFReader : public Reader { Optional ExtractPartition; public: - std::unique_ptr create(bool EnsureSymtab) const override; + Expected> create(bool EnsureSymtab) const override; explicit ELFReader(Binary *B, Optional ExtractPartition) : Bin(B), ExtractPartition(ExtractPartition) {} }; @@ -1072,7 +1080,7 @@ class Object { Ptr->Index = Sections.size(); return *Ptr; } - void addNewSymbolTable(); + Error addNewSymbolTable(); Segment &addSegment(ArrayRef Data) { Segments.emplace_back(std::make_unique(Data)); return *Segments.back(); diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index 46c869d72c6eb..6e6975c351395 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "MachOReader.h" -#include "../llvm-objcopy.h" #include "Object.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/MachO.h" diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp index 6a89076bafcf4..4302904437fd5 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "Object.h" -#include "../llvm-objcopy.h" #include "llvm/ADT/SmallPtrSet.h" #include diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index ee882ffb2742e..175f2929eb230 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm-objcopy.h" #include "Buffer.h" #include "COFF/COFFObjcopy.h" #include "CopyConfig.h" @@ -57,36 +56,6 @@ namespace objcopy { // The name this program was invoked as. StringRef ToolName; -LLVM_ATTRIBUTE_NORETURN void error(Twine Message) { - WithColor::error(errs(), ToolName) << Message << "\n"; - exit(1); -} - -LLVM_ATTRIBUTE_NORETURN void error(Error E) { - assert(E); - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(std::move(E), OS); - OS.flush(); - WithColor::error(errs(), ToolName) << Buf; - exit(1); -} - -LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, std::error_code EC) { - assert(EC); - error(createFileError(File, EC)); -} - -LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) { - assert(E); - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(std::move(E), OS); - OS.flush(); - WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf; - exit(1); -} - ErrorSuccess reportWarning(Error E) { assert(E); WithColor::warning(errs(), ToolName) << toString(std::move(E)) << '\n'; diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.h b/llvm/tools/llvm-objcopy/llvm-objcopy.h deleted file mode 100644 index 18a789ca1f83b..0000000000000 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H -#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H - -#include "llvm/ADT/Twine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" -#include - -namespace llvm { -namespace objcopy { - -LLVM_ATTRIBUTE_NORETURN extern void error(Twine Message); -LLVM_ATTRIBUTE_NORETURN extern void error(Error E); -LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File, Error E); -LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File, - std::error_code EC); - -// This is taken from llvm-readobj. -// [see here](llvm/tools/llvm-readobj/llvm-readobj.h:38) -template T unwrapOrError(Expected EO) { - if (EO) - return *EO; - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(EO.takeError(), OS); - OS.flush(); - error(Buf); -} - -} // end namespace objcopy -} // end namespace llvm - -#endif // LLVM_TOOLS_OBJCOPY_OBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp index 20781cef2d33a..eb0e5635cef94 100644 --- a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp @@ -12,7 +12,6 @@ #include "Object.h" #include "Reader.h" #include "Writer.h" -#include "llvm-objcopy.h" #include "llvm/Support/Errc.h" namespace llvm { From f528816d586a42e4cf27af5b2fa9ba91213307aa Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 1 Oct 2020 11:01:07 -0500 Subject: [PATCH 423/544] [Hexagon] Move selection of HVX multiply from lowering to patterns Also, change i32*i32 to V6_vmpyieoh + V6_vmpyiewuh_acc, which works on V60 as well. --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 70 +------------------ llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 14 ++++ llvm/test/CodeGen/Hexagon/autohvx/arith.ll | 46 ++++-------- 3 files changed, 27 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index e87ef08d8ed52..ee200b32ae771 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -91,6 +91,7 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::XOR, T, Legal); setOperationAction(ISD::ADD, T, Legal); setOperationAction(ISD::SUB, T, Legal); + setOperationAction(ISD::MUL, T, Legal); setOperationAction(ISD::CTPOP, T, Legal); setOperationAction(ISD::CTLZ, T, Legal); if (T != ByteV) { @@ -103,7 +104,6 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::LOAD, T, Custom); setOperationAction(ISD::MLOAD, T, Custom); setOperationAction(ISD::MSTORE, T, Custom); - setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); setOperationAction(ISD::MULHU, T, Custom); setOperationAction(ISD::BUILD_VECTOR, T, Custom); @@ -1444,73 +1444,6 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); } -SDValue -HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { - MVT ResTy = ty(Op); - assert(ResTy.isVector() && isHvxSingleTy(ResTy)); - const SDLoc &dl(Op); - SmallVector ShuffMask; - - MVT ElemTy = ResTy.getVectorElementType(); - unsigned VecLen = ResTy.getVectorNumElements(); - SDValue Vs = Op.getOperand(0); - SDValue Vt = Op.getOperand(1); - - switch (ElemTy.SimpleTy) { - case MVT::i8: { - // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), - // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, - // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). - MVT ExtTy = typeExtElem(ResTy, 2); - unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv - : Hexagon::V6_vmpyhv; - SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); - - // Discard high halves of the resulting values, collect the low halves. - for (unsigned I = 0; I < VecLen; I += 2) { - ShuffMask.push_back(I); // Pick even element. - ShuffMask.push_back(I+VecLen); // Pick odd element. - } - VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); - SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); - return DAG.getBitcast(ResTy, BS); - } - case MVT::i16: - // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode. - // (There is also V6_vmpyhv, which behaves in an analogous way to - // V6_vmpybv.) - return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG); - case MVT::i32: { - auto MulL_V60 = [&](SDValue Vs, SDValue Vt) { - // Use the following sequence for signed word multiply: - // T0 = V6_vmpyiowh Vs, Vt - // T1 = V6_vaslw T0, 16 - // T2 = V6_vmpyiewuh_acc T1, Vs, Vt - SDValue S16 = DAG.getConstant(16, dl, MVT::i32); - SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG); - SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG); - SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, - {T1, Vs, Vt}, DAG); - return T2; - }; - auto MulL_V62 = [&](SDValue Vs, SDValue Vt) { - MVT PairTy = typeJoin({ResTy, ResTy}); - SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, - {Vs, Vt}, DAG); - SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, - {T0, Vs, Vt}, DAG); - return opSplit(T1, dl, DAG).first; - }; - if (Subtarget.useHVXV62Ops()) - return MulL_V62(Vs, Vt); - return MulL_V60(Vs, Vt); - } - default: - break; - } - return SDValue(); -} - SDValue HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); @@ -2100,7 +2033,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerHvxShift(Op, DAG); - case ISD::MUL: return LowerHvxMul(Op, DAG); case ISD::MULHS: case ISD::MULHU: return LowerHvxMulh(Op, DAG); case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index b84c6eb27fe2a..c03e1c7925833 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -316,6 +316,20 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +let Predicates = [UseHVX] in { + // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), + // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, + // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). + def: Pat<(mul HVI8:$Vs, HVI8:$Vt), + (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)), + (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>; + def: Pat<(mul HVI16:$Vs, HVI16:$Vt), + (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(mul HVI32:$Vs, HVI32:$Vt), + (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt), + HvxVR:$Vs, HvxVR:$Vt)>; +} + let Predicates = [UseHVX] in { def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll index 99e287dce2144..348f3dd1df056 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll @@ -223,16 +223,16 @@ define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 { ; --- mul ; CHECK-LABEL: mpyb_64: -; CHECK: v[[H00:[0-9]+]]:[[L00:[0-9]+]].h = vmpy(v0.b,v1.b) -; CHECK: vshuffe(v[[H00]].b,v[[L00]].b) +; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b) +; CHECK: vshuffe(v[[H0]].b,v[[L0]].b) define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 { %p = mul <64 x i8> %v0, %v1 ret <64 x i8> %p } ; CHECK-LABEL: mpyb_128: -; CHECK: v[[H10:[0-9]+]]:[[L10:[0-9]+]].h = vmpy(v0.b,v1.b) -; CHECK: vshuffe(v[[H10]].b,v[[L10]].b) +; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b) +; CHECK: vshuffe(v[[H0]].b,v[[L0]].b) define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 { %p = mul <128 x i8> %v0, %v1 ret <128 x i8> %p @@ -252,43 +252,21 @@ define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 { ret <64 x i16> %p } -; CHECK-LABEL: mpyw_64_v60: -; CHECK-DAG: r[[T00:[0-9]+]] = #16 -; CHECK-DAG: v[[T01:[0-9]+]].w = vmpyio(v0.w,v1.h) -; CHECK: v[[T02:[0-9]+]].w = vasl(v[[T01]].w,r[[T00]]) -; CHECK: v[[T02]].w += vmpyie(v0.w,v1.uh) -define <16 x i32> @mpyw_64_v60(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK-LABEL: mpyw_64: +; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h) +; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh) +define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 { %p = mul <16 x i32> %v0, %v1 ret <16 x i32> %p } -; CHECK-LABEL: mpyw_128_v60: -; CHECK-DAG: r[[T10:[0-9]+]] = #16 -; CHECK-DAG: v[[T11:[0-9]+]].w = vmpyio(v0.w,v1.h) -; CHECK: v[[T12:[0-9]+]].w = vasl(v[[T11]].w,r[[T10]]) -; CHECK: v[[T12]].w += vmpyie(v0.w,v1.uh) -define <32 x i32> @mpyw_128_v60(<32 x i32> %v0, <32 x i32> %v1) #1 { - %p = mul <32 x i32> %v0, %v1 - ret <32 x i32> %p -} - -; CHECK-LABEL: mpyw_64_v62: -; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh) -; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h) -define <16 x i32> @mpyw_64_v62(<16 x i32> %v0, <16 x i32> %v1) #3 { - %p = mul <16 x i32> %v0, %v1 - ret <16 x i32> %p -} - -; CHECK-LABEL: mpyw_128_v62: -; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh) -; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h) -define <32 x i32> @mpyw_128_v62(<32 x i32> %v0, <32 x i32> %v1) #4 { +; CHECK-LABEL: mpyw_128: +; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h) +; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh) +define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 { %p = mul <32 x i32> %v0, %v1 ret <32 x i32> %p } attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } -attributes #3 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" } -attributes #4 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" } From f7e91e6cc7f35dd0dcc176463a355d78e1c020b1 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 2 Oct 2020 16:44:43 -0400 Subject: [PATCH 424/544] [libc++] Allow retries on some slightly flaky mutex tests --- .../thread.lock.unique.cons/mutex_try_to_lock.pass.cpp | 3 ++- .../thread.mutex.recursive/lock.pass.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp index 3c068d015251c..7aaf5a2955a4c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp @@ -5,8 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads +// ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp index 70061936bfd54..2225432aa3676 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp @@ -5,8 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads +// ALLOW_RETRIES: 2 // From a594fd28e373cb7cd348cf01f6a90e055bf6cf6d Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 22 Sep 2020 01:01:16 -0700 Subject: [PATCH 425/544] [Driver] Move detectLibcxxIncludePath to ToolChain This helper method is useful even outside of Gnu toolchains, so move it to ToolChain so it can be reused in other toolchains such as Fuchsia. Differential Revision: https://reviews.llvm.org/D88452 --- clang/include/clang/Driver/ToolChain.h | 3 +++ clang/lib/Driver/ToolChain.cpp | 23 +++++++++++++++++++++++ clang/lib/Driver/ToolChains/Fuchsia.cpp | 4 ++-- clang/lib/Driver/ToolChains/Gnu.cpp | 22 +--------------------- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 7495e08fe6e64..db4c4a7302325 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -575,6 +575,9 @@ class ToolChain { // given compilation arguments. virtual UnwindLibType GetUnwindLibType(const llvm::opt::ArgList &Args) const; + // Detect the highest available version of libc++ in base path. + virtual std::string detectLibcxxIncludePath(StringRef Base) const; + /// AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set /// the include paths to use for the given C++ standard library type. virtual void diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8991216da6765..8e98e32068808 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -924,6 +924,29 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs, } } +std::string ToolChain::detectLibcxxIncludePath(StringRef Base) const { + std::error_code EC; + int MaxVersion = 0; + std::string MaxVersionString; + for (llvm::vfs::directory_iterator LI = getVFS().dir_begin(Base, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef VersionText = llvm::sys::path::filename(LI->path()); + int Version; + if (VersionText[0] == 'v' && + !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { + if (Version > MaxVersion) { + MaxVersion = Version; + MaxVersionString = std::string(VersionText); + } + } + } + if (!MaxVersion) + return ""; + SmallString<128> P(Base); + llvm::sys::path::append(P, MaxVersionString); + return std::string(P.str()); +} + void ToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Header search paths should be handled by each of the subclasses. diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 781179be39a36..e5f23ee385559 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -319,8 +319,8 @@ void Fuchsia::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: { SmallString<128> P(getDriver().Dir); - llvm::sys::path::append(P, "..", "include", "c++", "v1"); - addSystemInclude(DriverArgs, CC1Args, P.str()); + llvm::sys::path::append(P, "..", "include", "c++"); + addSystemInclude(DriverArgs, CC1Args, detectLibcxxIncludePath(P.str())); break; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f3843685a522b..3778b6f297ed2 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2877,31 +2877,11 @@ void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, } } -static std::string DetectLibcxxIncludePath(llvm::vfs::FileSystem &vfs, - StringRef base) { - std::error_code EC; - int MaxVersion = 0; - std::string MaxVersionString; - for (llvm::vfs::directory_iterator LI = vfs.dir_begin(base, EC), LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef VersionText = llvm::sys::path::filename(LI->path()); - int Version; - if (VersionText[0] == 'v' && - !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { - if (Version > MaxVersion) { - MaxVersion = Version; - MaxVersionString = std::string(VersionText); - } - } - } - return MaxVersion ? (base + "/" + MaxVersionString).str() : ""; -} - void Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { auto AddIncludePath = [&](std::string Path) { - std::string IncludePath = DetectLibcxxIncludePath(getVFS(), Path); + std::string IncludePath = detectLibcxxIncludePath(Path); if (IncludePath.empty() || !getVFS().exists(IncludePath)) return false; addSystemInclude(DriverArgs, CC1Args, IncludePath); From ace644030e67506114d3ac9a221cf8eb5d10159c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 17 Aug 2017 18:57:00 +0300 Subject: [PATCH 426/544] [clang-tidy] Implement readability-function-cognitive-complexity check Currently, there is basically just one clang-tidy check to impose some sanity limits on functions - `clang-tidy-readability-function-size`. It is nice, allows to limit line count, total number of statements, number of branches, number of function parameters (not counting implicit `this`), nesting level. However, those are simple generic metrics. It is still trivially possible to write a function, which does not violate any of these metrics, yet is still rather unreadable. Thus, some additional, slightly more complicated metric is needed. There is a well-known [[ https://en.wikipedia.org/wiki/Cyclomatic_complexity | Cyclomatic complexity]], but certainly has its downsides. And there is a [[ https://www.sonarsource.com/docs/CognitiveComplexity.pdf | COGNITIVE COMPLEXITY by SonarSource ]], which is available for opensource on https://sonarcloud.io/. This check checks function Cognitive Complexity metric, and flags the functions with Cognitive Complexity exceeding the configured limit. The default limit is `25`, same as in 'upstream'. The metric is implemented as per [[ https://www.sonarsource.com/docs/CognitiveComplexity.pdf | COGNITIVE COMPLEXITY by SonarSource ]] specification version 1.2 (19 April 2017), with two notable exceptions: * `preprocessor conditionals` (`#ifdef`, `#if`, `#elif`, `#else`, `#endif`) are not accounted for. Could be done. Currently, upstream does not account for them either. * `each method in a recursion cycle` is not accounted for. It can't be fully implemented, because cross-translational-unit analysis would be needed, which is not possible in clang-tidy. Thus, at least right now, i completely avoided implementing it. There are some further possible improvements: * Are GNU statement expressions (`BinaryConditionalOperator`) really free? They should probably cause nesting level increase, and complexity level increase when they are nested within eachother. * Microsoft SEH support * ??? Reviewed By: aaron.ballman, JonasToth, lattner Differential Revision: https://reviews.llvm.org/D36836 --- .../clang-tidy/readability/CMakeLists.txt | 1 + .../FunctionCognitiveComplexityCheck.cpp | 542 +++++++++ .../FunctionCognitiveComplexityCheck.h | 43 + .../readability/ReadabilityTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../docs/clang-tidy/checks/list.rst | 1 + ...dability-function-cognitive-complexity.rst | 146 +++ ...dability-function-cognitive-complexity.cpp | 1015 +++++++++++++++++ 8 files changed, 1756 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-function-cognitive-complexity.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-function-cognitive-complexity.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index 4539ab177ced1..ecf37b5b91570 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -12,6 +12,7 @@ add_clang_library(clangTidyReadabilityModule DeleteNullPointerCheck.cpp DeletedDefaultCheck.cpp ElseAfterReturnCheck.cpp + FunctionCognitiveComplexityCheck.cpp FunctionSizeCheck.cpp IdentifierNamingCheck.cpp ImplicitBoolConversionCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp new file mode 100644 index 0000000000000..548aec7543ac9 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp @@ -0,0 +1,542 @@ +//===--- FunctionCognitiveComplexityCheck.cpp - clang-tidy ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FunctionCognitiveComplexityCheck.h" +#include "../ClangTidyDiagnosticConsumer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include +#include +#include + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace readability { +namespace { + +struct CognitiveComplexity final { + // Any increment is based on some combination of reasons. + // For details you can look at the Specification at + // https://www.sonarsource.com/docs/CognitiveComplexity.pdf + // or user-facing docs at + // http://clang.llvm.org/extra/clang-tidy/checks/readability-function-cognitive-complexity.html + // Here are all the possible reasons: + enum Criteria : uint8_t { + None = 0U, + + // B1, increases cognitive complexity (by 1) + // What causes it: + // * if, else if, else, ConditionalOperator (not BinaryConditionalOperator) + // * SwitchStmt + // * ForStmt, CXXForRangeStmt + // * WhileStmt, DoStmt + // * CXXCatchStmt + // * GotoStmt, IndirectGotoStmt (but not BreakStmt, ContinueStmt) + // * sequences of binary logical operators (BinOpLAnd, BinOpLOr) + // * each method in a recursion cycle (not implemented) + Increment = 1U << 0, + + // B2, increases current nesting level (by 1) + // What causes it: + // * if, else if, else, ConditionalOperator (not BinaryConditionalOperator) + // * SwitchStmt + // * ForStmt, CXXForRangeStmt + // * WhileStmt, DoStmt + // * CXXCatchStmt + // * nested CXXConstructor, CXXDestructor, CXXMethod (incl. C++11 Lambda) + // * GNU Statement Expression + // * Apple Block declaration + IncrementNesting = 1U << 1, + + // B3, increases cognitive complexity by the current nesting level + // Applied before IncrementNesting + // What causes it: + // * IfStmt, ConditionalOperator (not BinaryConditionalOperator) + // * SwitchStmt + // * ForStmt, CXXForRangeStmt + // * WhileStmt, DoStmt + // * CXXCatchStmt + PenalizeNesting = 1U << 2, + + All = Increment | PenalizeNesting | IncrementNesting, + }; + + // The helper struct used to record one increment occurrence, with all the + // details nessesary. + struct Detail { + const SourceLocation Loc; // What caused the increment? + const unsigned short Nesting; // How deeply nested is Loc located? + const Criteria C; // The criteria of the increment + + Detail(SourceLocation SLoc, unsigned short CurrentNesting, Criteria Crit) + : Loc(SLoc), Nesting(CurrentNesting), C(Crit) {} + + // To minimize the sizeof(Detail), we only store the minimal info there. + // This function is used to convert from the stored info into the usable + // information - what message to output, how much of an increment did this + // occurrence actually result in. + std::pair process() const { + assert(C != Criteria::None && "invalid criteria"); + + unsigned MsgId; // The id of the message to output. + unsigned short Increment; // How much of an increment? + + if (C == Criteria::All) { + Increment = 1 + Nesting; + MsgId = 0; + } else if (C == (Criteria::Increment | Criteria::IncrementNesting)) { + Increment = 1; + MsgId = 1; + } else if (C == Criteria::Increment) { + Increment = 1; + MsgId = 2; + } else if (C == Criteria::IncrementNesting) { + Increment = 0; // Unused in this message. + MsgId = 3; + } else + llvm_unreachable("should not get to here."); + + return std::make_pair(MsgId, Increment); + } + }; + + // Limit of 25 is the "upstream"'s default. + static constexpr unsigned DefaultLimit = 25U; + + // Based on the publicly-avaliable numbers for some big open-source projects + // https://sonarcloud.io/projects?languages=c%2Ccpp&size=5 we can estimate: + // value ~20 would result in no allocs for 98% of functions, ~12 for 96%, ~10 + // for 91%, ~8 for 88%, ~6 for 84%, ~4 for 77%, ~2 for 64%, and ~1 for 37%. + static_assert(sizeof(Detail) <= 8, + "Since we use SmallVector to minimize the amount of " + "allocations, we also need to consider the price we pay for " + "that in terms of stack usage. " + "Thus, it is good to minimize the size of the Detail struct."); + SmallVector Details; // 25 elements is 200 bytes. + // Yes, 25 is a magic number. This is the seemingly-sane default for the + // upper limit for function cognitive complexity. Thus it would make sense + // to avoid allocations for any function that does not violate the limit. + + // The grand total Cognitive Complexity of the function. + unsigned Total = 0; + + // The function used to store new increment, calculate the total complexity. + void account(SourceLocation Loc, unsigned short Nesting, Criteria C); +}; + +// All the possible messages that can be output. The choice of the message +// to use is based of the combination of the CognitiveComplexity::Criteria. +// It would be nice to have it in CognitiveComplexity struct, but then it is +// not static. +static const std::array Msgs = {{ + // B1 + B2 + B3 + "+%0, including nesting penalty of %1, nesting level increased to %2", + + // B1 + B2 + "+%0, nesting level increased to %2", + + // B1 + "+%0", + + // B2 + "nesting level increased to %2", +}}; + +// Criteria is a bitset, thus a few helpers are needed. +CognitiveComplexity::Criteria operator|(CognitiveComplexity::Criteria LHS, + CognitiveComplexity::Criteria RHS) { + return static_cast( + static_cast::type>( + LHS) | + static_cast::type>( + RHS)); +} +CognitiveComplexity::Criteria operator&(CognitiveComplexity::Criteria LHS, + CognitiveComplexity::Criteria RHS) { + return static_cast( + static_cast::type>( + LHS) & + static_cast::type>( + RHS)); +} +CognitiveComplexity::Criteria &operator|=(CognitiveComplexity::Criteria &LHS, + CognitiveComplexity::Criteria RHS) { + LHS = operator|(LHS, RHS); + return LHS; +} +CognitiveComplexity::Criteria &operator&=(CognitiveComplexity::Criteria &LHS, + CognitiveComplexity::Criteria RHS) { + LHS = operator&(LHS, RHS); + return LHS; +} + +void CognitiveComplexity::account(SourceLocation Loc, unsigned short Nesting, + Criteria C) { + C &= Criteria::All; + assert(C != Criteria::None && "invalid criteria"); + + Details.emplace_back(Loc, Nesting, C); + const Detail &D = Details.back(); + + unsigned MsgId; + unsigned short Increase; + std::tie(MsgId, Increase) = D.process(); + + Total += Increase; +} + +class FunctionASTVisitor final + : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + + // The current nesting level (increased by Criteria::IncrementNesting). + unsigned short CurrentNestingLevel = 0; + + // Used to efficiently know the last type of the binary sequence operator + // that was encountered. It would make sense for the function call to start + // the new sequence, thus it is a stack. + using OBO = Optional; + std::stack> BinaryOperatorsStack; + +public: + bool TraverseStmtWithIncreasedNestingLevel(Stmt *Node) { + ++CurrentNestingLevel; + bool ShouldContinue = Base::TraverseStmt(Node); + --CurrentNestingLevel; + return ShouldContinue; + } + + bool TraverseDeclWithIncreasedNestingLevel(Decl *Node) { + ++CurrentNestingLevel; + bool ShouldContinue = Base::TraverseDecl(Node); + --CurrentNestingLevel; + return ShouldContinue; + } + + bool TraverseIfStmt(IfStmt *Node, bool InElseIf = false) { + if (!Node) + return Base::TraverseIfStmt(Node); + + { + CognitiveComplexity::Criteria Reasons; + + Reasons = CognitiveComplexity::Criteria::None; + + // "If" increases cognitive complexity. + Reasons |= CognitiveComplexity::Criteria::Increment; + // "If" increases nesting level. + Reasons |= CognitiveComplexity::Criteria::IncrementNesting; + + if (!InElseIf) { + // "If" receives a nesting increment commensurate with it's nested + // depth, if it is not part of "else if". + Reasons |= CognitiveComplexity::Criteria::PenalizeNesting; + } + + CC.account(Node->getIfLoc(), CurrentNestingLevel, Reasons); + } + + // If this IfStmt is *NOT* "else if", then only the body (i.e. "Then" and + // "Else") is traversed with increased Nesting level. + // However if this IfStmt *IS* "else if", then Nesting level is increased + // for the whole IfStmt (i.e. for "Init", "Cond", "Then" and "Else"). + + if (!InElseIf) { + if (!TraverseStmt(Node->getInit())) + return false; + + if (!TraverseStmt(Node->getCond())) + return false; + } else { + if (!TraverseStmtWithIncreasedNestingLevel(Node->getInit())) + return false; + + if (!TraverseStmtWithIncreasedNestingLevel(Node->getCond())) + return false; + } + + // "Then" always increases nesting level. + if (!TraverseStmtWithIncreasedNestingLevel(Node->getThen())) + return false; + + if (!Node->getElse()) + return true; + + if (auto *E = dyn_cast(Node->getElse())) + return TraverseIfStmt(E, true); + + { + CognitiveComplexity::Criteria Reasons; + + Reasons = CognitiveComplexity::Criteria::None; + + // "Else" increases cognitive complexity. + Reasons |= CognitiveComplexity::Criteria::Increment; + // "Else" increases nesting level. + Reasons |= CognitiveComplexity::Criteria::IncrementNesting; + // "Else" DOES NOT receive a nesting increment commensurate with it's + // nested depth. + + CC.account(Node->getElseLoc(), CurrentNestingLevel, Reasons); + } + + // "Else" always increases nesting level. + return TraverseStmtWithIncreasedNestingLevel(Node->getElse()); + } + +// The currently-being-processed stack entry, which is always the top. +#define CurrentBinaryOperator BinaryOperatorsStack.top() + + // In a sequence of binary logical operators, if the new operator is different + // from the previous one, then the cognitive complexity is increased. + bool TraverseBinaryOperator(BinaryOperator *Op) { + if (!Op || !Op->isLogicalOp()) + return Base::TraverseBinaryOperator(Op); + + // Make sure that there is always at least one frame in the stack. + if (BinaryOperatorsStack.empty()) + BinaryOperatorsStack.emplace(); + + // If this is the first binary operator that we are processing, or the + // previous binary operator was different, there is an increment. + if (!CurrentBinaryOperator || Op->getOpcode() != CurrentBinaryOperator) + CC.account(Op->getOperatorLoc(), CurrentNestingLevel, + CognitiveComplexity::Criteria::Increment); + + // We might encounter a function call, which starts a new sequence, thus + // we need to save the current previous binary operator. + const Optional BinOpCopy(CurrentBinaryOperator); + + // Record the operator that we are currently processing and traverse it. + CurrentBinaryOperator = Op->getOpcode(); + bool ShouldContinue = Base::TraverseBinaryOperator(Op); + + // And restore the previous binary operator, which might be nonexistent. + CurrentBinaryOperator = BinOpCopy; + + return ShouldContinue; + } + + // It would make sense for the function call to start the new binary + // operator sequence, thus let's make sure that it creates a new stack frame. + bool TraverseCallExpr(CallExpr *Node) { + // If we are not currently processing any binary operator sequence, then + // no Node-handling is needed. + if (!Node || BinaryOperatorsStack.empty() || !CurrentBinaryOperator) + return Base::TraverseCallExpr(Node); + + // Else, do add [uninitialized] frame to the stack, and traverse call. + BinaryOperatorsStack.emplace(); + bool ShouldContinue = Base::TraverseCallExpr(Node); + // And remove the top frame. + BinaryOperatorsStack.pop(); + + return ShouldContinue; + } + +#undef CurrentBinaryOperator + + bool TraverseStmt(Stmt *Node) { + if (!Node) + return Base::TraverseStmt(Node); + + // Three following switch()'es have huge duplication, but it is better to + // keep them separate, to simplify comparing them with the Specification. + + CognitiveComplexity::Criteria Reasons = CognitiveComplexity::Criteria::None; + SourceLocation Location = Node->getBeginLoc(); + + // B1. Increments + // There is an increment for each of the following: + switch (Node->getStmtClass()) { + // if, else if, else are handled in TraverseIfStmt(), + // FIXME: "each method in a recursion cycle" Increment is not implemented. + case Stmt::ConditionalOperatorClass: + case Stmt::SwitchStmtClass: + case Stmt::ForStmtClass: + case Stmt::CXXForRangeStmtClass: + case Stmt::WhileStmtClass: + case Stmt::DoStmtClass: + case Stmt::CXXCatchStmtClass: + case Stmt::GotoStmtClass: + case Stmt::IndirectGotoStmtClass: + Reasons |= CognitiveComplexity::Criteria::Increment; + break; + default: + // break LABEL, continue LABEL increase cognitive complexity, + // but they are not supported in C++ or C. + // Regular break/continue do not increase cognitive complexity. + break; + } + + // B2. Nesting level + // The following structures increment the nesting level: + switch (Node->getStmtClass()) { + // if, else if, else are handled in TraverseIfStmt(), + // Nested methods and such are handled in TraverseDecl. + case Stmt::ConditionalOperatorClass: + case Stmt::SwitchStmtClass: + case Stmt::ForStmtClass: + case Stmt::CXXForRangeStmtClass: + case Stmt::WhileStmtClass: + case Stmt::DoStmtClass: + case Stmt::CXXCatchStmtClass: + case Stmt::LambdaExprClass: + case Stmt::StmtExprClass: + Reasons |= CognitiveComplexity::Criteria::IncrementNesting; + break; + default: + break; + } + + // B3. Nesting increments + // The following structures receive a nesting increment + // commensurate with their nested depth inside B2 structures: + switch (Node->getStmtClass()) { + // if, else if, else are handled in TraverseIfStmt(). + case Stmt::ConditionalOperatorClass: + case Stmt::SwitchStmtClass: + case Stmt::ForStmtClass: + case Stmt::CXXForRangeStmtClass: + case Stmt::WhileStmtClass: + case Stmt::DoStmtClass: + case Stmt::CXXCatchStmtClass: + Reasons |= CognitiveComplexity::Criteria::PenalizeNesting; + break; + default: + break; + } + + if (Node->getStmtClass() == Stmt::ConditionalOperatorClass) { + // A little beautification. + // For conditional operator "cond ? true : false" point at the "?" + // symbol. + ConditionalOperator *COp = dyn_cast(Node); + Location = COp->getQuestionLoc(); + } + + // If we have found any reasons, let's account it. + if (Reasons & CognitiveComplexity::Criteria::All) + CC.account(Location, CurrentNestingLevel, Reasons); + + // Did we decide that the nesting level should be increased? + if (!(Reasons & CognitiveComplexity::Criteria::IncrementNesting)) + return Base::TraverseStmt(Node); + + return TraverseStmtWithIncreasedNestingLevel(Node); + } + + // The parameter MainAnalyzedFunction is needed to differentiate between the + // cases where TraverseDecl() is the entry point from + // FunctionCognitiveComplexityCheck::check() and the cases where it was called + // from the FunctionASTVisitor itself. Explanation: if we get a function + // definition (e.g. constructor, destructor, method), the Cognitive Complexity + // specification states that the Nesting level shall be increased. But if this + // function is the entry point, then the Nesting level should not be + // increased. Thus that parameter is there and is used to fall-through + // directly to traversing if this is the main function that is being analyzed. + bool TraverseDecl(Decl *Node, bool MainAnalyzedFunction = false) { + if (!Node || MainAnalyzedFunction) + return Base::TraverseDecl(Node); + + // B2. Nesting level + // The following structures increment the nesting level: + switch (Node->getKind()) { + case Decl::Function: + case Decl::CXXMethod: + case Decl::CXXConstructor: + case Decl::CXXDestructor: + case Decl::Block: + break; + default: + // If this is something else, we use early return! + return Base::TraverseDecl(Node); + break; + } + + CC.account(Node->getBeginLoc(), CurrentNestingLevel, + CognitiveComplexity::Criteria::IncrementNesting); + + return TraverseDeclWithIncreasedNestingLevel(Node); + } + + CognitiveComplexity CC; +}; + +} // namespace + +FunctionCognitiveComplexityCheck::FunctionCognitiveComplexityCheck( + StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + Threshold(Options.get("Threshold", CognitiveComplexity::DefaultLimit)) {} + +void FunctionCognitiveComplexityCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "Threshold", Threshold); +} + +void FunctionCognitiveComplexityCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher( + functionDecl( + allOf(isDefinition(), unless(anyOf(isDefaulted(), isDeleted(), + isImplicit(), isInstantiated())))) + .bind("func"), + this); +} + +void FunctionCognitiveComplexityCheck::check( + const MatchFinder::MatchResult &Result) { + const auto *Func = Result.Nodes.getNodeAs("func"); + assert(Func->hasBody() && "The matchers should only match the functions that " + "have user-provided body."); + + FunctionASTVisitor Visitor; + Visitor.TraverseDecl(const_cast(Func), true); + + if (Visitor.CC.Total <= Threshold) + return; + + diag(Func->getLocation(), + "function %0 has cognitive complexity of %1 (threshold %2)") + << Func << Visitor.CC.Total << Threshold; + + // Output all the basic increments of complexity. + for (const auto &Detail : Visitor.CC.Details) { + unsigned MsgId; // The id of the message to output. + unsigned short Increase; // How much of an increment? + std::tie(MsgId, Increase) = Detail.process(); + assert(MsgId < Msgs.size() && "MsgId should always be valid"); + // Increase, on the other hand, can be 0. + + diag(Detail.Loc, Msgs[MsgId], DiagnosticIDs::Note) + << Increase << Detail.Nesting << 1 + Detail.Nesting; + } +} + +} // namespace readability +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h new file mode 100644 index 0000000000000..96b6723d2a6a5 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h @@ -0,0 +1,43 @@ +//===--- FunctionCognitiveComplexityCheck.h - clang-tidy --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_FUNCTIONCOGNITIVECOMPLEXITYCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_FUNCTIONCOGNITIVECOMPLEXITYCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace readability { + +/// Checks function Cognitive Complexity metric. +/// +/// There is only one configuration option: +/// +/// * `Threshold` - flag functions with Cognitive Complexity exceeding +/// this number. The default is `25`. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/readability-function-cognitive-complexity.html +class FunctionCognitiveComplexityCheck : public ClangTidyCheck { +public: + FunctionCognitiveComplexityCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + const unsigned Threshold; +}; + +} // namespace readability +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_FUNCTIONCOGNITIVECOMPLEXITYCHECK_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index 5ff5e20228394..bbd2e24e503b6 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -17,6 +17,7 @@ #include "DeleteNullPointerCheck.h" #include "DeletedDefaultCheck.h" #include "ElseAfterReturnCheck.h" +#include "FunctionCognitiveComplexityCheck.h" #include "FunctionSizeCheck.h" #include "IdentifierNamingCheck.h" #include "ImplicitBoolConversionCheck.h" @@ -70,6 +71,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-deleted-default"); CheckFactories.registerCheck( "readability-else-after-return"); + CheckFactories.registerCheck( + "readability-function-cognitive-complexity"); CheckFactories.registerCheck( "readability-function-size"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index da4b57f39a784..ac4802e6d498c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -106,6 +106,11 @@ New checks Finds condition variables in nested ``if`` statements that were also checked in the outer ``if`` statement and were not changed. +- New :doc:`readability-function-cognitive-complexity + ` check. + + Flags functions with Cognitive Complexity metric exceeding the configured limit. + Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 378e92cb66ddc..ec0e200b91d19 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -276,6 +276,7 @@ Clang-Tidy Checks `readability-delete-null-pointer `_, "Yes" `readability-deleted-default `_, `readability-else-after-return `_, "Yes" + `readability-function-cognitive-complexity `_, `readability-function-size `_, `readability-identifier-naming `_, "Yes" `readability-implicit-bool-conversion `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-function-cognitive-complexity.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-function-cognitive-complexity.rst new file mode 100644 index 0000000000000..b863357a21326 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-function-cognitive-complexity.rst @@ -0,0 +1,146 @@ +.. title:: clang-tidy - readability-function-cognitive-complexity + +readability-function-cognitive-complexity +========================================= + +Checks function Cognitive Complexity metric. + +The metric is implemented as per the `COGNITIVE COMPLEXITY by SonarSource +`_ specification +version 1.2 (19 April 2017). + +Options +------- + +.. option:: Threshold + + Flag functions with Cognitive Complexity exceeding this number. + The default is `25`. + +Building blocks +--------------- + +There are three basic building blocks of a Cognitive Complexity metric: + +Increment +^^^^^^^^^ + +The following structures increase the function's Cognitive Complexity metric +(by `1`): + +* Conditional operators: + + - ``if()`` + - ``else if()`` + - ``else`` + - ``cond ? true : false`` + +* ``switch()`` +* Loops: + + - ``for()`` + - C++11 range-based ``for()`` + - ``while()`` + - ``do while()`` + +* ``catch ()`` +* ``goto LABEL``, ``goto *(&&LABEL))``, +* sequences of binary logical operators: + + - ``boolean1 || boolean2`` + - ``boolean1 && boolean2`` + +Nesting level +^^^^^^^^^^^^^ + +While by itself the nesting level not change the function's Cognitive Complexity +metric, it is tracked, and is used by the next, third building block. +The following structures increase the nesting level (by `1`): + +* Conditional operators: + + - ``if()`` + - ``else if()`` + - ``else`` + - ``cond ? true : false`` + +* ``switch()`` +* Loops: + + - ``for()`` + - C++11 range-based ``for()`` + - ``while()`` + - ``do while()`` + +* ``catch ()`` +* Nested functions: + + - C++11 Lambda + - Nested ``class`` + - Nested ``struct`` +* GNU statement expression +* Apple Block Declaration + +Nesting increment +^^^^^^^^^^^^^^^^^ + +This is where the previous basic building block, `Nesting level`_, matters. +The following structures increase the function's Cognitive Complexity metric by +the current `Nesting level`_: + +* Conditional operators: + + - ``if()`` + - ``cond ? true : false`` + +* ``switch()`` +* Loops: + + - ``for()`` + - C++11 range-based ``for()`` + - ``while()`` + - ``do while()`` + +* ``catch ()`` + +Examples +-------- + +The simplest case. This function has Cognitive Complexity of `0`. + +.. code-block:: c++ + + void function0() {} + +Slightly better example. This function has Cognitive Complexity of `1`. + +.. code-block:: c++ + + int function1(bool var) { + if(var) // +1, nesting level +1 + return 42; + return 0; + } + +Full example. This function has Cognitive Complexity of `3`. + +.. code-block:: c++ + + int function3(bool var1, bool var2) { + if(var1) { // +1, nesting level +1 + if(var2) // +2 (1 + current nesting level of 1), nesting level +1 + return 42; + } + + return 0; + } + +Limitations +----------- + +The metric is implemented with two notable exceptions: + * `preprocessor conditionals` (``#ifdef``, ``#if``, ``#elif``, ``#else``, + ``#endif``) are not accounted for. + * `each method in a recursion cycle` is not accounted for. It can't be fully + implemented, because cross-translational-unit analysis would be needed, + which is currently not possible in clang-tidy. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-function-cognitive-complexity.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-function-cognitive-complexity.cpp new file mode 100644 index 0000000000000..431540c6ee96b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-function-cognitive-complexity.cpp @@ -0,0 +1,1015 @@ +// RUN: %check_clang_tidy %s readability-function-cognitive-complexity %t -- -config='{CheckOptions: [{key: readability-function-cognitive-complexity.Threshold, value: 0}]}' -- -std=c++11 -fblocks -w + +// any function should be checked. + +extern int ext_func(int x = 0); + +int some_func(int x = 0); + +static int some_other_func(int x = 0) {} + +template void some_templ_func(T x = 0) {} + +class SomeClass { +public: + int *begin(int x = 0); + int *end(int x = 0); + static int func(int x = 0); + template void some_templ_func(T x = 0) {} + SomeClass() = default; + SomeClass(SomeClass&) = delete; +}; + +// nothing ever decreases cognitive complexity, so we can check all the things +// in one go. none of the following should increase cognitive complexity: +void unittest_false() { + {}; + ext_func(); + some_func(); + some_other_func(); + some_templ_func(); + some_templ_func(); + SomeClass::func(); + SomeClass C; + C.some_templ_func(); + C.some_templ_func(); + C.func(); + C.end(); + int i = some_func(); + i = i; + i++; + --i; + i < 0; + int j = 0 ?: 1; + auto k = new int; + delete k; + throw i; + { + throw i; + } +end: + return; +} + +#if 1 +#define CC100 +#else +// this macro has cognitive complexity of 100. +// it is needed to be able to compare the testcases with the +// reference Sonar implementation. please place it right after the first +// CHECK-NOTES in each function +#define CC100 if(1){if(1){if(1){if(1){if(1){if(1){if(1){if(1){if(1){if(1){if(1){if(1){if(1){}}}}}if(1){}}}}}}}}} +#endif + +//----------------------------------------------------------------------------// +//------------------------------ B1. Increments ------------------------------// +//----------------------------------------------------------------------------// +// Check that every thing listed in B1 of the specification does indeed // +// recieve the base increment, and that not-body does not increase nesting // +//----------------------------------------------------------------------------// + +// break does not increase cognitive complexity. +// only break LABEL does, but it is unavaliable in C or C++ + +// continue does not increase cognitive complexity. +// only continue LABEL does, but it is unavaliable in C or C++ + +void unittest_b1_00() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_00' has cognitive complexity of 33 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:9: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + + if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:11: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } else if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:12: note: +1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:18: note: +3, including nesting penalty of 2, nesting level increased to 3{{$}} + } else { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, nesting level increased to 2{{$}} + } + } else if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:10: note: +1, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:16: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + + if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:11: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } else if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:12: note: +1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:18: note: +3, including nesting penalty of 2, nesting level increased to 3{{$}} + } else { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, nesting level increased to 2{{$}} + } + } else { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, nesting level increased to 1{{$}} + + if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:11: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } else if (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:12: note: +1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:18: note: +3, including nesting penalty of 2, nesting level increased to 3{{$}} + } else { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b1_01() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_01' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + int i = (1 ? 1 : 0) ? 1 : 0; +// CHECK-NOTES: :[[@LINE-1]]:23: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:14: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +} + +void unittest_b1_02(int x) { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_02' has cognitive complexity of 9 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + switch (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:13: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + case -1: + return; + case 1 ? 1 : 0: +// CHECK-NOTES: :[[@LINE-1]]:10: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + return; + case (1 ? 2 : 0) ... (1 ? 3 : 0): +// CHECK-NOTES: :[[@LINE-1]]:11: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-2]]:27: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + return; + default: + break; + } +} + +void unittest_b1_03(int x) { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_03' has cognitive complexity of 7 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + for (x = 1 ? 1 : 0; x < (1 ? 1 : 0); x += 1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:14: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-3]]:30: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-4]]:47: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + break; + continue; + } +} + +void unittest_b1_04() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_04' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + SomeClass C; + for (int i : (1 ? C : C)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:19: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + break; + continue; + } +} + +void unittest_b1_05() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_05' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + while (1 ? 1 : 0) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:12: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + break; + continue; + } +} + +void unittest_b1_06() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_06' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + do { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + break; + continue; + } while (1 ? 1 : 0); +// CHECK-NOTES: :[[@LINE-1]]:14: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +} + +void unittest_b1_07() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_07' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } +} + +void unittest_b1_08_00() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_08_00' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + goto end; +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1{{$}} +end: + return; +} + +void unittest_b1_08_01() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_08_01' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + void *ptr = &&end; + goto *ptr; +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1{{$}} +end: + return; +} + +void unittest_b1_09_00() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_09_00' has cognitive complexity of 34 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if(1 && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} + } + if(1 && 1 && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:13: note: +1{{$}} + } + if((1 && 1) && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:15: note: +1{{$}} + } + if(1 && (1 && 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} + } + + if(1 && 1 || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:13: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:8: note: +1{{$}} + } + if((1 && 1) || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:15: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:9: note: +1{{$}} + } + if(1 && (1 || 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:14: note: +1{{$}} + } + + if(1 || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} + } + if(1 || 1 || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:13: note: +1{{$}} + } + if((1 || 1) || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:15: note: +1{{$}} + } + if(1 || (1 || 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} + } + + if(1 || 1 && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:13: note: +1{{$}} + } + if((1 || 1) && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:15: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:9: note: +1{{$}} + } + if(1 || (1 && 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:14: note: +1{{$}} + } +} + +void unittest_b1_09_01() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_09_01' has cognitive complexity of 40 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if(1 && some_func(1 && 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + if(1 && some_func(1 || 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + if(1 || some_func(1 || 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + if(1 || some_func(1 && 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + + if(1 && some_func(1 && 1) && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + if(1 && some_func(1 || 1) && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + if(1 || some_func(1 || 1) && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-4]]:23: note: +1{{$}} + } + if(1 || some_func(1 && 1) && 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-4]]:23: note: +1{{$}} + } + + if(1 && some_func(1 && 1) || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-4]]:23: note: +1{{$}} + } + if(1 && some_func(1 || 1) || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-4]]:23: note: +1{{$}} + } + if(1 || some_func(1 || 1) || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } + if(1 || some_func(1 && 1) || 1) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:29: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:23: note: +1{{$}} + } +} + +void unittest_b1_09_02() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b1_09_02' has cognitive complexity of 12 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if(1 && SomeClass::func(1 && 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:29: note: +1{{$}} + } + if(1 && SomeClass::func(1 || 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:29: note: +1{{$}} + } + if(1 || SomeClass::func(1 || 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:29: note: +1{{$}} + } + if(1 || SomeClass::func(1 && 1)) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:8: note: +1{{$}} +// CHECK-NOTES: :[[@LINE-3]]:29: note: +1{{$}} + } +} + +// FIXME: each method in a recursion cycle + +//----------------------------------------------------------------------------// +//---------------------------- B2. Nesting lebel -----------------------------// +//----------------------------------------------------------------------------// +// Check that every thing listed in B2 of the specification does indeed // +// increase the nesting level // +//----------------------------------------------------------------------------// + +void unittest_b2_00() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_00' has cognitive complexity of 9 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } else if (true) { +// CHECK-NOTES: :[[@LINE-1]]:10: note: +1, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } else { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b2_01() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_01' has cognitive complexity of 5 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + int i = 1 ? (1 ? 1 : 0) : (1 ? 1 : 0); +// CHECK-NOTES: :[[@LINE-1]]:13: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +// CHECK-NOTES: :[[@LINE-2]]:18: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +// CHECK-NOTES: :[[@LINE-3]]:32: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} +} + +void unittest_b2_02(int x) { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_02' has cognitive complexity of 5 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + switch (x) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + case -1: + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + return; + default: + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + return; + } +} + +void unittest_b2_03() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_03' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + for (;;) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b2_04() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_04' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + SomeClass C; + for (int i : C) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b2_05() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_05' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + while (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b2_06() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_06' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + do { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } while (true); +} + +void unittest_b2_07() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_07' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if(true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b2_08_00() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_08_00' has cognitive complexity of 10 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + class X { + X() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + X &operator=(const X &other) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + ~X() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + void Y() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + static void Z() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + +// CHECK-NOTES: :[[@LINE-45]]:5: warning: function 'X' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-42]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-39]]:8: warning: function 'operator=' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-36]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-33]]:5: warning: function '~X' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-30]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-27]]:10: warning: function 'Y' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-24]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-21]]:17: warning: function 'Z' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-18]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + }; +} + +void unittest_b2_08_01() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_08_01' has cognitive complexity of 10 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + struct X { + X() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + X &operator=(const X &other) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + ~X() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + void Y() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + + static void Z() { +// CHECK-NOTES: :[[@LINE-1]]:5: note: nesting level increased to 1{{$}} + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } + +// CHECK-NOTES: :[[@LINE-45]]:5: warning: function 'X' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-42]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-39]]:8: warning: function 'operator=' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-36]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-33]]:5: warning: function '~X' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-30]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-27]]:10: warning: function 'Y' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-24]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + +// CHECK-NOTES: :[[@LINE-21]]:17: warning: function 'Z' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-18]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + }; +} + +void unittest_b2_08_02() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_08_02' has cognitive complexity of 2 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + auto fun = []() { +// CHECK-NOTES: :[[@LINE-1]]:14: note: nesting level increased to 1{{$}} + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + }; +// CHECK-NOTES: :[[@LINE-6]]:14: warning: function 'operator()' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-5]]:5: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} +} + +void unittest_b2_09() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_09' has cognitive complexity of 2 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + ({ +// CHECK-NOTES: :[[@LINE-1]]:3: note: nesting level increased to 1{{$}} + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + }); +} + +void unittest_b2_10() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b2_10' has cognitive complexity of 2 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + void (^foo)(void) = ^(void) { +// CHECK-NOTES: :[[@LINE-1]]:23: note: nesting level increased to 1{{$}} + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + }; +} + +//----------------------------------------------------------------------------// +//-------------------------- B3. Nesting increments --------------------------// +//----------------------------------------------------------------------------// +// Check that every thing listed in B3 of the specification does indeed // +// recieve the penalty of the current nesting level // +//----------------------------------------------------------------------------// + +void unittest_b3_00() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_00' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b3_01() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_01' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + int i = 1 ? 1 : 0; +// CHECK-NOTES: :[[@LINE-1]]:15: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } +} + +void unittest_b3_02(int x) { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_02' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + switch (x) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + case -1: + return; + default: + return; + } + } +} + +void unittest_b3_03() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_03' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + for (;;) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b3_04() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_04' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + SomeClass C; + for (int i : C) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b3_05() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_05' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + while (true) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +void unittest_b3_06() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_06' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + do { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } while (true); + } +} + +void unittest_b3_07() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'unittest_b3_07' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + if (true) { +// CHECK-NOTES: :[[@LINE-1]]:3: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +2, including nesting penalty of 1, nesting level increased to 2{{$}} + } + } +} + +//----------------------------------------------------------------------------// +// Check that functions are being checked // +//----------------------------------------------------------------------------// + +class CheckClass { + CheckClass(int x) { +// CHECK-NOTES: :[[@LINE-1]]:3: warning: function 'CheckClass' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void PrivateMemberFunction() { +// CHECK-NOTES: :[[@LINE-1]]:8: warning: function 'PrivateMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void PrivateConstMemberFunction() const { +// CHECK-NOTES: :[[@LINE-1]]:8: warning: function 'PrivateConstMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + static void PrivateStaticMemberFunction() { +// CHECK-NOTES: :[[@LINE-1]]:15: warning: function 'PrivateStaticMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + +public: + CheckClass() { +// CHECK-NOTES: :[[@LINE-1]]:3: warning: function 'CheckClass' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + operator bool() const { +// CHECK-NOTES: :[[@LINE-1]]:3: warning: function 'operator bool' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + ~CheckClass() { +// CHECK-NOTES: :[[@LINE-1]]:3: warning: function '~CheckClass' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void PublicMemberFunction() { +// CHECK-NOTES: :[[@LINE-1]]:8: warning: function 'PublicMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void PublicConstMemberFunction() const { +// CHECK-NOTES: :[[@LINE-1]]:8: warning: function 'PublicConstMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + static void PublicStaticMemberFunction() { +// CHECK-NOTES: :[[@LINE-1]]:15: warning: function 'PublicStaticMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void PublicFunctionDefinition(); + +protected: + CheckClass(bool b) { +// CHECK-NOTES: :[[@LINE-1]]:3: warning: function 'CheckClass' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void ProtectedMemberFunction() { +// CHECK-NOTES: :[[@LINE-1]]:8: warning: function 'ProtectedMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + void ProtectedConstMemberFunction() const { +// CHECK-NOTES: :[[@LINE-1]]:8: warning: function 'ProtectedConstMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } + + static void ProtectedStaticMemberFunction() { +// CHECK-NOTES: :[[@LINE-1]]:15: warning: function 'ProtectedStaticMemberFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:7: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } + } +}; + +void CheckClass::PublicFunctionDefinition() { +// CHECK-NOTES: :[[@LINE-1]]:18: warning: function 'PublicFunctionDefinition' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } +} + +#define uglyfunctionmacro(name) \ + void name() { \ + CC100; \ + \ + if (true) { \ + try { \ + } catch (...) { \ + } \ + } \ + } + +uglyfunctionmacro(MacroFunction) +// CHECK-NOTES: :[[@LINE-1]]:19: warning: function 'MacroFunction' has cognitive complexity of 3 (threshold 0) [readability-function-cognitive-complexity] +// CHECK-NOTES: :[[@LINE-2]]:1: note: +1, including nesting penalty of 0, nesting level increased to 1 +// CHECK-NOTES: :[[@LINE-10]]:5: note: expanded from macro 'uglyfunctionmacro' +// CHECK-NOTES: :[[@LINE-4]]:1: note: +2, including nesting penalty of 1, nesting level increased to 2 +// CHECK-NOTES: :[[@LINE-10]]:9: note: expanded from macro 'uglyfunctionmacro' + +template +void templatedFunction() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'templatedFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } +} + +template<> +void templatedFunction() { +// CHECK-NOTES: :[[@LINE-1]]:6: warning: function 'templatedFunction' has cognitive complexity of 1 (threshold 0) [readability-function-cognitive-complexity] + CC100; + + try { + } catch (...) { +// CHECK-NOTES: :[[@LINE-1]]:5: note: +1, including nesting penalty of 0, nesting level increased to 1{{$}} + } +} + +template void templatedFunction(); + +void functionThatCallsTemplatedFunctions() { + templatedFunction(); + + templatedFunction(); + + templatedFunction(); + + templatedFunction(); +} From 14f6bfcb52e77867a6a84fcfd9e21bb5f1f5795c Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Thu, 27 Feb 2020 15:57:44 -0800 Subject: [PATCH 427/544] [clang] Implement objc_non_runtime_protocol to remove protocol metadata Summary: Motivated by the new objc_direct attribute, this change adds a new attribute that remotes metadata from Protocols that the programmer knows isn't going to be used at runtime. We simply have the frontend skip generating any protocol metadata entries (e.g. OBJC_CLASS_NAME, _OBJC_$_PROTOCOL_INSTANCE_METHDOS, _OBJC_PROTOCOL, etc) for a protocol marked with `__attribute__((objc_non_runtime_protocol))`. There are a few APIs used to retrieve a protocol at runtime. `@protocol(SomeProtocol)` will now error out of the requested protocol is marked with attribute. `objc_getProtocol` will return `NULL` which is consistent with the behavior of a non-existing protocol. Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D75574 --- clang/include/clang/AST/DeclObjC.h | 8 + clang/include/clang/Basic/Attr.td | 7 + clang/include/clang/Basic/AttrDocs.td | 16 ++ .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/lib/AST/DeclObjC.cpp | 22 +++ clang/lib/CodeGen/CGObjC.cpp | 69 +++++++++ clang/lib/CodeGen/CGObjCGNU.cpp | 30 +++- clang/lib/CodeGen/CGObjCMac.cpp | 33 ++-- clang/lib/CodeGen/CGObjCRuntime.h | 11 ++ clang/lib/Sema/SemaDeclAttr.cpp | 8 + clang/lib/Sema/SemaExprObjC.cpp | 3 + clang/test/CodeGenObjC/non-runtime-protocol.m | 142 ++++++++++++++++++ ...a-attribute-supported-attributes-list.test | 1 + 13 files changed, 336 insertions(+), 16 deletions(-) create mode 100644 clang/test/CodeGenObjC/non-runtime-protocol.m diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h index 32e69d7fe1ed1..f2c25bceed185 100644 --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -2178,6 +2178,14 @@ class ObjCProtocolDecl : public ObjCContainerDecl, data().ReferencedProtocols.set(List, Num, Locs, C); } + /// This is true iff the protocol is tagged with the `objc_static_protocol` + /// attribute. + bool isNonRuntimeProtocol() const; + + /// Get the set of all protocols implied by this protocols inheritance + /// hierarchy. + void getImpliedProtocols(llvm::DenseSet &IPs) const; + ObjCProtocolDecl *lookupProtocolNamed(IdentifierInfo *PName); // Lookup a method. First, we search locally. If a method isn't diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4d7a65964887c..60e7a9d4303b0 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2024,6 +2024,13 @@ def ObjCDirectMembers : Attr { let Documentation = [ObjCDirectMembersDocs]; } +def ObjCNonRuntimeProtocol : Attr { + let Spellings = [Clang<"objc_non_runtime_protocol">]; + let Subjects = SubjectList<[ObjCProtocol], ErrorDiag>; + let LangOpts = [ObjC]; + let Documentation = [ObjCNonRuntimeProtocolDocs]; +} + def ObjCRuntimeName : Attr { let Spellings = [Clang<"objc_runtime_name">]; let Subjects = SubjectList<[ObjCInterface, ObjCProtocol], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 8c236796546c1..bf190829381c5 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4620,6 +4620,22 @@ properties, including auto-synthesized properties. }]; } +def ObjCNonRuntimeProtocolDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +The ``objc_non_runtime_protocol`` attribute can be used to mark that an +Objective-C protocol is only used during static type-checking and doesn't need +to be represented dynamically. This avoids several small code-size and run-time +overheads associated with handling the protocol's metadata. A non-runtime +protocol cannot be used as the operand of a ``@protocol`` expression, and +dynamic attempts to find it with ``objc_getProtocol`` will fail. + +If a non-runtime protocol inherits from any ordinary protocols, classes and +derived protocols that declare conformance to the non-runtime protocol will +dynamically list their conformance to those bare protocols. + }]; +} + def SelectAnyDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f29eec316971d..b3b3bc7238635 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1034,6 +1034,8 @@ def warn_objc_boxing_invalid_utf8_string : Warning< "string is ill-formed as UTF-8 and will become a null %0 when boxed">, InGroup; +def err_objc_non_runtime_protocol_in_protocol_expr : Error< + "cannot use a protocol declared 'objc_non_runtime_protocol' in a @protocol expression">; def err_objc_direct_on_protocol : Error< "'objc_direct' attribute cannot be applied to %select{methods|properties}0 " "declared in an Objective-C protocol">; diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp index 78ef9a1c67c9e..b6f8227b157a1 100644 --- a/clang/lib/AST/DeclObjC.cpp +++ b/clang/lib/AST/DeclObjC.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include using namespace clang; @@ -1905,6 +1906,27 @@ ObjCProtocolDecl *ObjCProtocolDecl::CreateDeserialized(ASTContext &C, return Result; } +bool ObjCProtocolDecl::isNonRuntimeProtocol() const { + return hasAttr(); +} + +void ObjCProtocolDecl::getImpliedProtocols( + llvm::DenseSet &IPs) const { + std::queue WorkQueue; + WorkQueue.push(this); + + while (!WorkQueue.empty()) { + const auto *PD = WorkQueue.front(); + WorkQueue.pop(); + for (const auto *Parent : PD->protocols()) { + const auto *Can = Parent->getCanonicalDecl(); + auto Result = IPs.insert(Can); + if (Result.second) + WorkQueue.push(Parent); + } + } +} + ObjCProtocolDecl *ObjCProtocolDecl::lookupProtocolNamed(IdentifierInfo *Name) { ObjCProtocolDecl *PDecl = this; diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 99b896ae34886..f905e17e8ad2f 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -445,6 +445,75 @@ CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend( Method); } +static void AppendFirstImpliedRuntimeProtocols( + const ObjCProtocolDecl *PD, + llvm::UniqueVector &PDs) { + if (!PD->isNonRuntimeProtocol()) { + const auto *Can = PD->getCanonicalDecl(); + PDs.insert(Can); + return; + } + + for (const auto *ParentPD : PD->protocols()) + AppendFirstImpliedRuntimeProtocols(ParentPD, PDs); +} + +std::vector +CGObjCRuntime::GetRuntimeProtocolList(ObjCProtocolDecl::protocol_iterator begin, + ObjCProtocolDecl::protocol_iterator end) { + std::vector RuntimePds; + llvm::DenseSet NonRuntimePDs; + + for (; begin != end; ++begin) { + const auto *It = *begin; + const auto *Can = It->getCanonicalDecl(); + if (Can->isNonRuntimeProtocol()) + NonRuntimePDs.insert(Can); + else + RuntimePds.push_back(Can); + } + + // If there are no non-runtime protocols then we can just stop now. + if (NonRuntimePDs.empty()) + return RuntimePds; + + // Else we have to search through the non-runtime protocol's inheritancy + // hierarchy DAG stopping whenever a branch either finds a runtime protocol or + // a non-runtime protocol without any parents. These are the "first-implied" + // protocols from a non-runtime protocol. + llvm::UniqueVector FirstImpliedProtos; + for (const auto *PD : NonRuntimePDs) + AppendFirstImpliedRuntimeProtocols(PD, FirstImpliedProtos); + + // Walk the Runtime list to get all protocols implied via the inclusion of + // this protocol, e.g. all protocols it inherits from including itself. + llvm::DenseSet AllImpliedProtocols; + for (const auto *PD : RuntimePds) { + const auto *Can = PD->getCanonicalDecl(); + AllImpliedProtocols.insert(Can); + Can->getImpliedProtocols(AllImpliedProtocols); + } + + // Similar to above, walk the list of first-implied protocols to find the set + // all the protocols implied excluding the listed protocols themselves since + // they are not yet a part of the `RuntimePds` list. + for (const auto *PD : FirstImpliedProtos) { + PD->getImpliedProtocols(AllImpliedProtocols); + } + + // From the first-implied list we have to finish building the final protocol + // list. If a protocol in the first-implied list was already implied via some + // inheritance path through some other protocols then it would be redundant to + // add it here and so we skip over it. + for (const auto *PD : FirstImpliedProtos) { + if (!AllImpliedProtocols.contains(PD)) { + RuntimePds.push_back(PD); + } + } + + return RuntimePds; +} + /// Instead of '[[MyClass alloc] init]', try to generate /// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the /// caller side, as well as the optimized objc_alloc. diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index ed36e4a5cbc1a..c6500c0230c4d 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -1187,8 +1187,11 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { } llvm::Constant *GenerateCategoryProtocolList(const ObjCCategoryDecl *OCD) override { - SmallVector Protocols; - for (const auto *PI : OCD->getReferencedProtocols()) + const auto &ReferencedProtocols = OCD->getReferencedProtocols(); + auto RuntimeProtocols = GetRuntimeProtocolList(ReferencedProtocols.begin(), + ReferencedProtocols.end()); + SmallVector Protocols; + for (const auto *PI : RuntimeProtocols) Protocols.push_back( llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), ProtocolPtrTy)); @@ -1371,7 +1374,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { } SmallVector Protocols; - for (const auto *PI : PD->protocols()) + auto RuntimeProtocols = + GetRuntimeProtocolList(PD->protocol_begin(), PD->protocol_end()); + for (const auto *PI : RuntimeProtocols) Protocols.push_back( llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), ProtocolPtrTy)); @@ -1910,8 +1915,10 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // struct objc_class *sibling_class classFields.addNullPointer(PtrTy); // struct objc_protocol_list *protocols; - SmallVector Protocols; - for (const auto *I : classDecl->protocols()) + auto RuntimeProtocols = GetRuntimeProtocolList(classDecl->protocol_begin(), + classDecl->protocol_end()); + SmallVector Protocols; + for (const auto *I : RuntimeProtocols) Protocols.push_back( llvm::ConstantExpr::getBitCast(GenerateProtocolRef(I), ProtocolPtrTy)); @@ -3076,6 +3083,9 @@ CGObjCGNU::GenerateEmptyProtocol(StringRef ProtocolName) { } void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { + if (PD->isNonRuntimeProtocol()) + return; + std::string ProtocolName = PD->getNameAsString(); // Use the protocol definition, if there is one. @@ -3228,8 +3238,11 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef bits) { llvm::Constant *CGObjCGNU::GenerateCategoryProtocolList(const ObjCCategoryDecl *OCD) { + const auto &RefPro = OCD->getReferencedProtocols(); + const auto RuntimeProtos = + GetRuntimeProtocolList(RefPro.begin(), RefPro.end()); SmallVector Protocols; - for (const auto *PD : OCD->getReferencedProtocols()) + for (const auto *PD : RuntimeProtos) Protocols.push_back(PD->getNameAsString()); return GenerateProtocolList(Protocols); } @@ -3515,8 +3528,11 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { llvm::Constant *Properties = GeneratePropertyList(OID, ClassDecl); // Collect the names of referenced protocols + auto RefProtocols = ClassDecl->protocols(); + auto RuntimeProtocols = + GetRuntimeProtocolList(RefProtocols.begin(), RefProtocols.end()); SmallVector Protocols; - for (const auto *I : ClassDecl->protocols()) + for (const auto *I : RuntimeProtocols) Protocols.push_back(I->getNameAsString()); // Get the superclass pointer. diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index aa50d2173a7de..dff86744698d4 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/UniqueVector.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" @@ -3196,7 +3197,8 @@ CGObjCMac::EmitProtocolList(Twine name, ObjCProtocolDecl::protocol_iterator begin, ObjCProtocolDecl::protocol_iterator end) { // Just return null for empty protocol lists - if (begin == end) + auto PDs = GetRuntimeProtocolList(begin, end); + if (PDs.empty()) return llvm::Constant::getNullValue(ObjCTypes.ProtocolListPtrTy); ConstantInitBuilder builder(CGM); @@ -3209,9 +3211,9 @@ CGObjCMac::EmitProtocolList(Twine name, auto countSlot = values.addPlaceholder(); auto refsArray = values.beginArray(ObjCTypes.ProtocolPtrTy); - for (; begin != end; ++begin) { - refsArray.add(GetProtocolRef(*begin)); - } + for (const auto *Proto : PDs) + refsArray.add(GetProtocolRef(Proto)); + auto count = refsArray.size(); // This list is null terminated. @@ -6648,7 +6650,8 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, // This routine is called for @protocol only. So, we must build definition // of protocol's meta-data (not a reference to it!) - // + assert(!PD->isNonRuntimeProtocol() && + "attempting to get a protocol ref to a static protocol."); llvm::Constant *Init = llvm::ConstantExpr::getBitCast(GetOrEmitProtocol(PD), ObjCTypes.getExternalProtocolPtrTy()); @@ -7005,6 +7008,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( const ObjCProtocolDecl *PD) { llvm::GlobalVariable *&Entry = Protocols[PD->getIdentifier()]; + assert(!PD->isNonRuntimeProtocol() && + "attempting to GetOrEmit a non-runtime protocol"); if (!Entry) { // We use the initializer as a marker of whether this is a forward // reference or not. At module finalization we add the empty @@ -7148,10 +7153,20 @@ llvm::Constant * CGObjCNonFragileABIMac::EmitProtocolList(Twine Name, ObjCProtocolDecl::protocol_iterator begin, ObjCProtocolDecl::protocol_iterator end) { + // Just return null for empty protocol lists + auto Protocols = GetRuntimeProtocolList(begin, end); + if (Protocols.empty()) + return llvm::Constant::getNullValue(ObjCTypes.ProtocolListnfABIPtrTy); + SmallVector ProtocolRefs; + ProtocolRefs.reserve(Protocols.size()); - // Just return null for empty protocol lists - if (begin == end) + for (const auto *PD : Protocols) + ProtocolRefs.push_back(GetProtocolRef(PD)); + + // If all of the protocols in the protocol list are objc_non_runtime_protocol + // just return null + if (ProtocolRefs.size() == 0) return llvm::Constant::getNullValue(ObjCTypes.ProtocolListnfABIPtrTy); // FIXME: We shouldn't need to do this lookup here, should we? @@ -7168,8 +7183,8 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name, // A null-terminated array of protocols. auto array = values.beginArray(ObjCTypes.ProtocolnfABIPtrTy); - for (; begin != end; ++begin) - array.add(GetProtocolRef(*begin)); // Implemented??? + for (auto const &proto : ProtocolRefs) + array.add(proto); auto count = array.size(); array.addNullPointer(ObjCTypes.ProtocolnfABIPtrTy); diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h index 60f98389067e1..f56101df77b6c 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.h +++ b/clang/lib/CodeGen/CGObjCRuntime.h @@ -20,6 +20,7 @@ #include "CGValue.h" #include "clang/AST/DeclObjC.h" #include "clang/Basic/IdentifierTable.h" // Selector +#include "llvm/ADT/UniqueVector.h" namespace llvm { class Constant; @@ -205,6 +206,16 @@ class CGObjCRuntime { const CallArgList &CallArgs, const ObjCMethodDecl *Method = nullptr) = 0; + /// Walk the list of protocol references from a class, category or + /// protocol to traverse the DAG formed from it's inheritance hierarchy. Find + /// the list of protocols that ends each walk at either a runtime + /// protocol or a non-runtime protocol with no parents. For the common case of + /// just a list of standard runtime protocols this just returns the same list + /// that was passed in. + std::vector + GetRuntimeProtocolList(ObjCProtocolDecl::protocol_iterator begin, + ObjCProtocolDecl::protocol_iterator end); + /// Emit the code to return the named protocol as an object, as in a /// \@protocol expression. virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 06a1205e39646..c07e5f792d147 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2617,6 +2617,11 @@ static void handleVisibilityAttr(Sema &S, Decl *D, const ParsedAttr &AL, D->addAttr(newAttr); } +static void handleObjCNonRuntimeProtocolAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + handleSimpleAttribute(S, D, AL); +} + static void handleObjCDirectAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // objc_direct cannot be set on methods declared in the context of a protocol if (isa(D->getDeclContext())) { @@ -7665,6 +7670,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_ObjCDirect: handleObjCDirectAttr(S, D, AL); break; + case ParsedAttr::AT_ObjCNonRuntimeProtocol: + handleObjCNonRuntimeProtocolAttr(S, D, AL); + break; case ParsedAttr::AT_ObjCDirectMembers: handleObjCDirectMembersAttr(S, D, AL); handleSimpleAttribute(S, D, AL); diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 2c088c8b15a3f..d161509aef240 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -1394,6 +1394,9 @@ ExprResult Sema::ParseObjCProtocolExpression(IdentifierInfo *ProtocolId, Diag(ProtoLoc, diag::err_undeclared_protocol) << ProtocolId; return true; } + if (PDecl->isNonRuntimeProtocol()) + Diag(ProtoLoc, diag::err_objc_non_runtime_protocol_in_protocol_expr) + << PDecl; if (!PDecl->hasDefinition()) { Diag(ProtoLoc, diag::err_atprotocol_protocol) << PDecl; Diag(PDecl->getLocation(), diag::note_entity_declared_at) << PDecl; diff --git a/clang/test/CodeGenObjC/non-runtime-protocol.m b/clang/test/CodeGenObjC/non-runtime-protocol.m new file mode 100644 index 0000000000000..4390fa7973e3a --- /dev/null +++ b/clang/test/CodeGenObjC/non-runtime-protocol.m @@ -0,0 +1,142 @@ +// RUN: not %clang_cc1 -emit-llvm -fobjc-arc -triple x86_64-apple-darwin10 %s -DPROTOEXPR -o - 2>&1 \ +// RUN: | FileCheck -check-prefix=PROTOEXPR %s + +// RUN: %clang_cc1 -emit-llvm -fobjc-arc -triple x86_64-apple-darwin10 %s -DREDUNDANCY -o - \ +// RUN: | FileCheck -check-prefix=REDUNDANCY1 %s +// RUN: %clang_cc1 -emit-llvm -fobjc-arc -triple x86_64-apple-darwin10 %s -DREDUNDANCY -o - \ +// RUN: | FileCheck -check-prefix=REDUNDANCY2 %s + +// RUN: %clang_cc1 -emit-llvm -fobjc-arc -triple x86_64-apple-darwin10 %s -DBASE -o - \ +// RUN: | FileCheck -check-prefix=NONFRAGILE %s +// RUN: %clang_cc1 -emit-llvm -fobjc-arc -triple x86_64-apple-darwin10 %s -DINHERITANCE -o - \ +// RUN: | FileCheck -check-prefix=INHERITANCE %s + +// RUN: %clang_cc1 -emit-llvm -triple x86_64-apple-darwin -fobjc-runtime=macosx-fragile-10.5 %s -DBASE -o - \ +// RUN: | FileCheck -check-prefix=FRAGILE %s +// RUN: %clang_cc1 -emit-llvm -triple x86_64-apple-darwin -fobjc-runtime=macosx-fragile-10.5 %s -DINHERITANCE -o - \ +// RUN: | FileCheck -check-prefix=FRAGILEINHERITANCE %s + +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu -fobjc-runtime=gnustep %s -DBASE -o - \ +// RUN: | FileCheck -check-prefix=GNU %s +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu -fobjc-runtime=gnustep %s -DINHERITANCE -o - \ +// RUN: | FileCheck -check-prefix=GNUINHERITANCE %s +// +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu -fobjc-runtime=gnustep-2 %s -DBASE -o - \ +// RUN: | FileCheck -check-prefix=GNU2 %s +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu -fobjc-runtime=gnustep-2 %s -DINHERITANCE -o - \ +// RUN: | FileCheck -check-prefix=GNU2INHERITANCE %s + +__attribute__((objc_root_class)) +@interface Root +@end +@implementation Root +@end + +#ifdef REDUNDANCY +// REDUNDANCY1-NOT: _OBJC_CLASS_PROTOCOLS_$_Implementer{{.*}}_OBJC_PROTOCOL_$_B +// REDUNDANCY2: _OBJC_CLASS_PROTOCOLS_$_Implementer{{.*}}_OBJC_PROTOCOL_$_C{{.*}}_OBJC_PROTOCOL_$_A +@protocol C +@end +@protocol B +@end +@protocol A +@end +__attribute__((objc_non_runtime_protocol)) @protocol Alpha +@end +__attribute__((objc_non_runtime_protocol)) @protocol Beta +@end +@interface Implementer : Root +@end +@implementation Implementer +@end +#endif + +#ifdef BASE +// Confirm that we're not emitting protocol information for the +// NONFRAGILE-NOT: OBJC_CLASS_NAME{{.*}}NonRuntimeProtocol +// NONFRAGILE-NOT: _OBJC_$_PROTOCOL_INSTANCE_METHODS_NonRuntimeProtocol +// NONFRAGILE-NOT: _OBJC_$_PROTOCOL_CLASS_METHODS_NonRuntimeProtocol +// NONFRAGILE-NOT: _OBJC_PROTOCOL_$_NonRuntimeProtocol +// NONFRAGILE-NOT: _OBJC_LABEL_PROTOCOL_$_NonRuntimeProtocol +// NONFRAGILE-NOT: _OBJC_CLASS_PROTOCOLS_$_NonRuntimeImplementer +// FRAGILE-NOT: OBJC_CLASS_NAME_.{{.*}}"Runtime\00" +// FRAGILE-NOT: OBJC_PROTOCOL_NonRuntime +// FRAGILE_NOT: OBJC_PROTOCOLS_NonRuntimeImplementer +// GNU-NOT: private unnamed_addr constant {{.*}} c"NonRuntimeProtocol\00" +// GNU-NOT: @.objc_protocol {{.*}} +// GNU2-NOT: private unnamed_addr constant {{.*}} c"NonRuntimeProtocol\00" +// GNU2-NOT: @.objc_protocol {{.*}} +__attribute__((objc_non_runtime_protocol)) +@protocol NonRuntimeProtocol +- (void)doThing; ++ (void)doClassThing; +@end +// NONFRAGILE: @"_OBJC_METACLASS_RO_$_NonRuntimeImplementer" {{.*}} %struct._objc_protocol_list* null +// NONFRAGILE: @"_OBJC_CLASS_RO_$_NonRuntimeImplementer" {{.*}} %struct._objc_protocol_list* null +@interface NonRuntimeImplementer : Root +- (void)doThing; ++ (void)doClassThing; +@end + +@implementation NonRuntimeImplementer +- (void)doThing { +} ++ (void)doClassThing { +} +@end +#endif + +#ifdef PROTOEXPR +__attribute__((objc_non_runtime_protocol)) +@protocol NonRuntimeProtocol +@end +void use() { + // PROTOEXPR: cannot use a protocol declared 'objc_non_runtime_protocol' in a @protocol expression + Protocol *p = @protocol(NonRuntimeProtocol); +} +#endif + +#ifdef INHERITANCE +// Confirm that we only emit references to the non-runtime protocols and +// properly walk the DAG to find the right protocols. +// INHERITANCE: OBJC_PROTOCOL_$_R2{{.*}} +// INHERITANCE: OBJC_PROTOCOL_$_R3{{.*}} +// INHERITANCE: @"_OBJC_CLASS_PROTOCOLS_$_Implementer" {{.*}}_OBJC_PROTOCOL_$_R2{{.*}}_OBJC_PROTOCOL_$_R3 + +// FRAGILEINHERITANCE: OBJC_PROTOCOL_R2 +// FRAGILEINHERITANCE: OBJC_PROTOCOL_R3 +// FRAGILEINHERITANCE: OBJC_CLASS_PROTOCOLS_Implementer{{.*}}OBJC_PROTOCOL_R2{{.*}}OBJC_PROTOCOL_R3 + +// GNUINHERITANCE-DAG: @[[Proto1:[0-9]]]{{.*}}c"R1\00" +// GNUINHERITANCE-DAG: [[P1Name:@.objc_protocol.[0-9]*]]{{.*}}@[[Proto1]] +// GNUINHERITANCE-DAG: @[[Proto2:[0-9]]]{{.*}}c"R2\00" +// GNUINHERITANCE-DAG: [[P2Name:@.objc_protocol.[0-9]+]]{{.*}}@[[Proto2]] +// GNUINHERITANCE-DAG: @[[Proto3:[0-9]]]{{.*}}c"R3\00" +// GNUINHERITANCE-DAG: [[P3Name:@.objc_protocol.[0-9]+]]{{.*}}@[[Proto3]] +// GNUINHERITANCE-DAG: @.objc_protocol_list{{.*}} +// GNUINHERITANCE: @.objc_protocol_list{{.*}}[[Proto3]]{{.*}}[[Proto2]] + +// GNU2INHERITANCE-DAG: @[[Proto1:[0-9]]]{{.*}}c"R1\00" +// GNU2INHERITANCE-DAG: _OBJC_PROTOCOL_R1{{.*}}@[[Proto1]] +// GNU2INHERITANCE-DAG: @[[Proto2:[0-9]]]{{.*}}c"R2\00" +// GNU2INHERITANCE-DAG: _OBJC_PROTOCOL_R2{{.*}}@[[Proto2]] +// GNU2INHERITANCE-DAG: @[[Proto3:[0-9]]]{{.*}}c"R3\00" +// GNU2INHERITANCE-DAG: _OBJC_PROTOCOL_R3{{.*}}@[[Proto3]] +// GNU2INHERITANCE: @.objc_protocol_list{{.*}}_OBJC_PROTOCOL_R2{{.*}}_OBJC_PROTOCOL_R3 +@protocol R1 +@end +@protocol R2 +@end +@protocol R3 +@end +__attribute__((objc_non_runtime_protocol)) @protocol N3 +@end +__attribute__((objc_non_runtime_protocol)) @protocol N1 +@end +__attribute__((objc_non_runtime_protocol)) @protocol N2 +@end +@interface Implementer : Root +@end +@implementation Implementer +@end +#endif diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index d385b858986cf..f67e2eee5818c 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -118,6 +118,7 @@ // CHECK-NEXT: ObjCExternallyRetained (SubjectMatchRule_variable_not_is_parameter, SubjectMatchRule_function, SubjectMatchRule_block, SubjectMatchRule_objc_method) // CHECK-NEXT: ObjCMethodFamily (SubjectMatchRule_objc_method) // CHECK-NEXT: ObjCNonLazyClass (SubjectMatchRule_objc_interface, SubjectMatchRule_objc_implementation) +// CHECK-NEXT: ObjCNonRuntimeProtocol (SubjectMatchRule_objc_protocol) // CHECK-NEXT: ObjCPreciseLifetime (SubjectMatchRule_variable) // CHECK-NEXT: ObjCRequiresPropertyDefs (SubjectMatchRule_objc_interface) // CHECK-NEXT: ObjCRequiresSuper (SubjectMatchRule_objc_method) From aff896dea13fed04d79bbd1ce20d5e49fec720bc Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 29 Sep 2020 14:55:46 -0700 Subject: [PATCH 428/544] [NFC][MSAN] Extract llvm.abs handling into a function Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D88519 --- .../Instrumentation/MemorySanitizer.cpp | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index cd54b6c2cd8f6..df2144d206d5a 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2638,12 +2638,6 @@ struct MemorySanitizerVisitor : public InstVisitor { return false; unsigned NumArgOperands = I.getNumArgOperands(); - if (I.getIntrinsicID() == Intrinsic::abs) { - assert(NumArgOperands == 2); - // The last argument is just a boolean flag. - NumArgOperands = 1; - } - for (unsigned i = 0; i < NumArgOperands; ++i) { Type *Ty = I.getArgOperand(i)->getType(); if (Ty != RetTy) @@ -3236,8 +3230,24 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + // Instrument abs intrinsic. + // handleUnknownIntrinsic can't handle it because of the last + // is_int_min_poison argument which does not match the result type. + void handleAbsIntrinsic(IntrinsicInst &I) { + assert(I.getType()->isIntOrIntVectorTy()); + assert(I.getArgOperand(0)->getType() == I.getType()); + + // FIXME: Handle is_int_min_poison. + IRBuilder<> IRB(&I); + setShadow(&I, getShadow(&I, 0)); + setOrigin(&I, getOrigin(&I, 0)); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { + case Intrinsic::abs: + handleAbsIntrinsic(I); + break; case Intrinsic::lifetime_start: handleLifetimeStart(I); break; From 04fce1515b7ae2fcf7986d8578c18cfd559c68b2 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 2 Oct 2020 17:30:42 -0400 Subject: [PATCH 429/544] [libc++] Fix the build with GCC < 10 For now, we still need to support older GCCs, so work around the lack of __is_constructible on older GCCs. --- libcxx/include/type_traits | 8 ++++++-- .../meta.unary/meta.unary.prop/is_constructible.pass.cpp | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 03556389e2c6c..75d60cddd305c 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -2883,7 +2883,11 @@ namespace __is_construct struct __nat {}; } -#if !defined(_LIBCPP_CXX03_LANG) && !__has_feature(is_constructible) && !defined(_LIBCPP_COMPILER_GCC) +#if defined(_LIBCPP_COMPILER_GCC) && _GNUC_VER_NEW >= 10000 +# define _LIBCPP_GCC_SUPPORTS_IS_CONSTRUCTIBLE +#endif + +#if !defined(_LIBCPP_CXX03_LANG) && !__has_feature(is_constructible) && !defined(_LIBCPP_GCC_SUPPORTS_IS_CONSTRUCTIBLE) template struct __libcpp_is_constructible; @@ -2998,7 +3002,7 @@ struct __libcpp_is_constructible<_Tp&&, _A0> #endif -#if __has_feature(is_constructible) || defined(_LIBCPP_COMPILER_GCC) +#if __has_feature(is_constructible) || defined(_LIBCPP_GCC_SUPPORTS_IS_CONSTRUCTIBLE) template struct _LIBCPP_TEMPLATE_VIS is_constructible : public integral_constant diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp index e4fad7cd36c94..d8ee865d6de99 100644 --- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp @@ -11,6 +11,8 @@ // template // struct is_constructible; +// UNSUPPORTED: gcc-5, gcc-6, gcc-7, gcc-8, gcc-9 + #include #include "test_macros.h" From dc6a0b0ec7e3d72a4cc849af4e4aa6c6a29a53d2 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 25 Sep 2020 12:34:38 -0400 Subject: [PATCH 430/544] [HIP] Align device binary To facilitate faster loading of device binaries and share them among processes, HIP runtime favors their alignment being 4096 bytes. HIP runtime can load unaligned device binaries, however, aligning them at 4096 bytes results in faster loading and less shared memory usage. This patch adds an option -bundle-align to clang-offload-bundler which allows bundles to be aligned at specified alignment. By default it is 1, which is NFC compared to existing format. This patch then aligns embedded fat binary and device binary inside fat binary at 4096 bytes. It has been verified this change does not cause significant overall file size increase for typical HIP applications (less than 1%). Differential Revision: https://reviews.llvm.org/D88734 --- clang/lib/CodeGen/CGCUDANV.cpp | 6 ++++-- clang/lib/Driver/ToolChains/HIP.cpp | 7 ++++++- clang/test/CodeGenCUDA/device-stub.cu | 2 +- clang/test/Driver/clang-offload-bundler.c | 10 ++++++++++ clang/test/Driver/hip-toolchain-no-rdc.hip | 2 ++ clang/test/Driver/hip-toolchain-rdc.hip | 5 +++++ .../clang-offload-bundler/ClangOffloadBundler.cpp | 13 +++++++++++++ 7 files changed, 41 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index baf2c79cc2b66..b0b76ffbebdee 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -597,8 +597,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { if (CudaGpuBinary) { // If fatbin is available from early finalization, create a string // literal containing the fat binary loaded from the given file. - FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), - "", FatbinConstantName, 8); + const unsigned HIPCodeObjectAlign = 4096; + FatBinStr = + makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", + FatbinConstantName, HIPCodeObjectAlign); } else { // If fatbin is not available, create an external symbol // __hip_fatbin in section .hip_fatbin. The external symbol is supposed diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 78f53204bd8c1..25b3ab88bc02e 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -16,6 +16,7 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetParser.h" @@ -33,6 +34,7 @@ using namespace llvm::opt; #endif namespace { +const unsigned HIPCodeObjectAlign = 4096; static void addBCLib(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs, ArgStringList LibraryPaths, @@ -108,6 +110,8 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, // for different GPU archs. ArgStringList BundlerArgs; BundlerArgs.push_back(Args.MakeArgString("-type=o")); + BundlerArgs.push_back( + Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign))); // ToDo: Remove the dummy host binary entry which is required by // clang-offload-bundler. @@ -175,7 +179,8 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( ObjStream << " .section .hip_fatbin,\"aMS\",@progbits,1\n"; ObjStream << " .data\n"; ObjStream << " .globl __hip_fatbin\n"; - ObjStream << " .p2align 3\n"; + ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign)) + << "\n"; ObjStream << "__hip_fatbin:\n"; ObjStream << " .incbin \"" << BundleFile << "\"\n"; ObjStream.flush(); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 0f4a5644fd48f..ca21116fc989e 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -115,7 +115,7 @@ void use_pointers() { // ALL: @4 = private unnamed_addr constant [21 x i8] c"ext_constant_var_def\00" // * constant unnamed string with GPU binary // CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00", -// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00", +// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",{{.*}}align 4096 // HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin" // CUDANORDC-SAME: section ".nv_fatbin", align 8 // CUDARDC-SAME: section "__nv_relfatbin", align 8 diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index a0724b3c60e81..21699e78dda6d 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -278,6 +278,16 @@ // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 +// +// Check -bundle-align option +// + +// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.bc,%t.tgt1,%t.tgt2 -outputs=%t.bundle3.bc +// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.bc,%t.res.tgt1,%t.res.tgt2 -inputs=%t.bundle3.bc -unbundle +// RUN: diff %t.bc %t.res.bc +// RUN: diff %t.tgt1 %t.res.tgt1 +// RUN: diff %t.tgt2 %t.res.tgt2 + // Some code so that we can create a binary out of this file. int A = 0; void test_func(void) { diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index c03c78a97c945..471c3022ecefa 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -81,6 +81,7 @@ // // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-bundle-align=4096" // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_A_803]],[[IMG_DEV_A_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]" @@ -143,6 +144,7 @@ // // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-bundle-align=4096" // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_B_803]],[[IMG_DEV_B_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index 97d5e59c0c4b1..8d8e675140357 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -8,10 +8,14 @@ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ // RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ +// RUN: -fhip-dump-offload-linker-script \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s +// check code object alignment in dumped llvm-mc input +// CHECK: .p2align 12 + // emit objects for host side path // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" @@ -87,6 +91,7 @@ // combine images generated into hip fat binary object // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-bundle-align=4096" // CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp index 3f9925d1e099d..e4a32d5e87441 100644 --- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -94,6 +94,11 @@ static cl::opt PrintExternalCommands( "instead of actually executing them - for testing purposes.\n"), cl::init(false), cl::cat(ClangOffloadBundlerCategory)); +static cl::opt + BundleAlignment("bundle-align", + cl::desc("Alignment of bundle for binary files"), + cl::init(1), cl::cat(ClangOffloadBundlerCategory)); + /// Magic string that marks the existence of offloading data. #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" @@ -223,6 +228,9 @@ class BinaryFileHandler final : public FileHandler { StringMap::iterator CurBundleInfo; StringMap::iterator NextBundleInfo; + /// Current bundle target to be written. + std::string CurWriteBundleTarget; + public: BinaryFileHandler() : FileHandler() {} @@ -337,10 +345,12 @@ class BinaryFileHandler final : public FileHandler { unsigned Idx = 0; for (auto &T : TargetNames) { MemoryBuffer &MB = *Inputs[Idx++]; + HeaderSize = alignTo(HeaderSize, BundleAlignment); // Bundle offset. Write8byteIntegerToBuffer(OS, HeaderSize); // Size of the bundle (adds to the next bundle's offset) Write8byteIntegerToBuffer(OS, MB.getBufferSize()); + BundlesInfo[T] = BundleInfo(MB.getBufferSize(), HeaderSize); HeaderSize += MB.getBufferSize(); // Size of the triple Write8byteIntegerToBuffer(OS, T.size()); @@ -351,6 +361,7 @@ class BinaryFileHandler final : public FileHandler { } Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final { + CurWriteBundleTarget = TargetTriple.str(); return Error::success(); } @@ -359,6 +370,8 @@ class BinaryFileHandler final : public FileHandler { } Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { + auto BI = BundlesInfo[CurWriteBundleTarget]; + OS.seek(BI.Offset); OS.write(Input.getBufferStart(), Input.getBufferSize()); return Error::success(); } From 8fb2a235b0f22dedba72b8b559ba33171a8dcd09 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Fri, 2 Oct 2020 13:34:46 -0700 Subject: [PATCH 431/544] Don't reject calls to MinGW's unusual _setjmp declaration. We now recognize this function as a builtin despite it having an unexpected number of parameters; make sure we don't enforce that it has only 1 argument for its 2 parameters. --- clang/include/clang/Basic/Builtins.def | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 6 ++- clang/lib/Sema/SemaChecking.cpp | 5 -- clang/test/Sema/builtin-setjmp.c | 63 +++++++++++++++++++------- 4 files changed, 52 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index d001b0bea9e6d..b2876ed6cbedc 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1028,6 +1028,7 @@ LIBBUILTIN(pthread_create, "", "fC<2,3>", "pthread.h", ALL_GNU_LANGUAGES) // POSIX setjmp.h +// FIXME: MinGW _setjmp has an additional void* parameter. LIBBUILTIN(_setjmp, "iJ", "fjT", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(__sigsetjmp, "iSJi", "fjT", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(sigsetjmp, "iSJi", "fjT", "setjmp.h", ALL_LANGUAGES) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bb1c1d1aef338..e5f6ee138a21e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3764,11 +3764,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_abnormal_termination: return RValue::get(EmitSEHAbnormalTermination()); case Builtin::BI_setjmpex: - if (getTarget().getTriple().isOSMSVCRT()) + if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && + E->getArg(0)->getType()->isPointerType()) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); break; case Builtin::BI_setjmp: - if (getTarget().getTriple().isOSMSVCRT()) { + if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && + E->getArg(0)->getType()->isPointerType()) { if (getTarget().getTriple().getArch() == llvm::Triple::x86) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index eeb3222624005..951772a08d187 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1570,11 +1570,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (SemaBuiltinSetjmp(TheCall)) return ExprError(); break; - case Builtin::BI_setjmp: - case Builtin::BI_setjmpex: - if (checkArgCount(*this, TheCall, 1)) - return true; - break; case Builtin::BI__builtin_classify_type: if (checkArgCount(*this, TheCall, 1)) return true; TheCall->setType(Context.IntTy); diff --git a/clang/test/Sema/builtin-setjmp.c b/clang/test/Sema/builtin-setjmp.c index 6a114fad05d9d..604d534eb504a 100644 --- a/clang/test/Sema/builtin-setjmp.c +++ b/clang/test/Sema/builtin-setjmp.c @@ -1,34 +1,47 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_JMP_BUF %s -ast-dump | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DWRONG_JMP_BUF %s -ast-dump | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DRIGHT_JMP_BUF %s -ast-dump | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DONLY_JMP_BUF %s -ast-dump | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify -DNO_SETJMP %s -ast-dump 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=c,expected -DNO_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=c,expected -DWRONG_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=c,expected -DRIGHT_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=c,expected -DONLY_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=c,expected -DNO_SETJMP %s -ast-dump 2>&1 | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=cxx,expected -x c++ -DNO_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=cxx,expected -x c++ -DWRONG_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=cxx,expected -x c++ -DRIGHT_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=cxx,expected -x c++ -DONLY_JMP_BUF %s -ast-dump | FileCheck %s --check-prefixes=CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify=cxx,expected -x c++ -DNO_SETJMP %s -ast-dump | FileCheck %s --check-prefixes=CHECK2 + +#ifdef __cplusplus +extern "C" { +#endif #ifdef NO_JMP_BUF // This happens in some versions of glibc: the declaration of __sigsetjmp // precedes the declaration of sigjmp_buf. extern long setjmp(long *); // Can't check, so we trust that this is the right type // FIXME: We could still diagnose the missing `jmp_buf` at the point of the call. -// expected-no-diagnostics +// c-no-diagnostics #elif WRONG_JMP_BUF typedef long jmp_buf; -extern int setjmp(char); // expected-warning {{incompatible redeclaration of library function 'setjmp'}} - // expected-note@-1 {{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}} +// FIXME: Consider producing a similar warning in C++. +extern int setjmp(char); // c-warning {{incompatible redeclaration of library function 'setjmp'}} + // c-note@-1 {{'setjmp' is a builtin with type 'int (jmp_buf)' (aka 'int (long)')}} #elif RIGHT_JMP_BUF typedef long jmp_buf; extern int setjmp(long); // OK, right type. -// expected-no-diagnostics #elif ONLY_JMP_BUF typedef int *jmp_buf; #endif void use() { setjmp(0); - #ifdef NO_SETJMP - // expected-warning@-2 {{implicit declaration of function 'setjmp' is invalid in C99}} + #if NO_SETJMP + // cxx-error@-2 {{undeclared identifier 'setjmp'}} + // c-warning@-3 {{implicit declaration of function 'setjmp' is invalid in C99}} #elif ONLY_JMP_BUF - // expected-warning@-4 {{implicitly declaring library function 'setjmp' with type 'int (jmp_buf)' (aka 'int (int *)')}} - // expected-note@-5 {{include the header or explicitly provide a declaration for 'setjmp'}} + // cxx-error@-5 {{undeclared identifier 'setjmp'}} + // c-warning@-6 {{implicitly declaring library function 'setjmp' with type 'int (jmp_buf)' (aka 'int (int *)')}} + // c-note@-7 {{include the header or explicitly provide a declaration for 'setjmp'}} + #else + // cxx-no-diagnostics #endif #ifdef NO_SETJMP @@ -37,6 +50,24 @@ void use() { #endif } -// CHECK: FunctionDecl {{.*}} used setjmp -// CHECK: BuiltinAttr {{.*}} Implicit -// CHECK: ReturnsTwiceAttr {{.*}} Implicit +// CHECK1: FunctionDecl {{.*}} used setjmp +// CHECK1: BuiltinAttr {{.*}} Implicit +// CHECK1: ReturnsTwiceAttr {{.*}} Implicit + +// mingw declares _setjmp with an unusual signature. +int _setjmp(void *, void *); +#if !defined(NO_JMP_BUF) && !defined(NO_SETJMP) +// c-warning@-2 {{incompatible redeclaration of library function '_setjmp'}} +// c-note@-3 {{'_setjmp' is a builtin with type 'int (jmp_buf)'}} +#endif +void use_mingw() { + _setjmp(0, 0); +} + +// CHECK2: FunctionDecl {{.*}} used _setjmp +// CHECK2: BuiltinAttr {{.*}} Implicit +// CHECK2: ReturnsTwiceAttr {{.*}} Implicit + +#ifdef __cplusplus +} +#endif From b113fa451061244e68af13328df9df46619bd0c3 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 2 Oct 2020 15:39:15 -0700 Subject: [PATCH 432/544] [test][Coro][NewPM] Fix coro-elide.ll under NPM --- llvm/test/Transforms/Coroutines/coro-elide.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/Coroutines/coro-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide.ll index ae0e30ab50ef4..7863c9daf2228 100644 --- a/llvm/test/Transforms/Coroutines/coro-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide.ll @@ -1,6 +1,6 @@ ; Tests that the coro.destroy and coro.resume are devirtualized where possible, ; SCC pipeline restarts and inlines the direct calls. -; RUN: opt < %s -S -inline -coro-elide -dce | FileCheck %s +; RUN: opt < %s -S -inline -coro-elide -dce -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -S \ ; RUN: -passes='cgscc(repeat<2>(inline,function(coro-elide,dce)))' \ ; RUN: | FileCheck %s From 321986fe68298ded3259bd1ffefe8cbdd60dbf18 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 1 Oct 2020 11:49:45 -0700 Subject: [PATCH 433/544] [MetaRenamer][NewPM] Port metarenamer to NPM Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D88690 --- .../llvm/Transforms/Utils/MetaRenamer.h | 26 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/Utils/MetaRenamer.cpp | 235 ++++++++++-------- llvm/test/Transforms/MetaRenamer/main.ll | 1 + .../Transforms/MetaRenamer/metarenamer.ll | 1 + 6 files changed, 155 insertions(+), 110 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/MetaRenamer.h diff --git a/llvm/include/llvm/Transforms/Utils/MetaRenamer.h b/llvm/include/llvm/Transforms/Utils/MetaRenamer.h new file mode 100644 index 0000000000000..fff3dff75837e --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/MetaRenamer.h @@ -0,0 +1,26 @@ +//===- MetaRenamer.h - Rename everything with metasyntatic names ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_METARENAMER_H +#define LLVM_TRANSFORMS_UTILS_METARENAMER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct MetaRenamerPass : PassInfoMixin { + PreservedAnalyses run(Module &, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_METARENAMER_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index af87f5e23a537..38fe128d7c1e6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -203,6 +203,7 @@ #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index be0ab2cc398ed..867a3ec634a9e 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -68,6 +68,7 @@ MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) +MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("mergefunc", MergeFunctionsPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 7f961dbaf4b47..e350320e75697 100644 --- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -25,6 +26,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/InitializePasses.h" @@ -40,123 +42,125 @@ static const char *const metaNames[] = { }; namespace { +// This PRNG is from the ISO C spec. It is intentionally simple and +// unsuitable for cryptographic use. We're just looking for enough +// variety to surprise and delight users. +struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { next = seed; } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } +}; - // This PRNG is from the ISO C spec. It is intentionally simple and - // unsuitable for cryptographic use. We're just looking for enough - // variety to surprise and delight users. - struct PRNG { - unsigned long next; +struct Renamer { + Renamer(unsigned int seed) { prng.srand(seed); } - void srand(unsigned int seed) { - next = seed; - } + const char *newName() { + return metaNames[prng.rand() % array_lengthof(metaNames)]; + } - int rand() { - next = next * 1103515245 + 12345; - return (unsigned int)(next / 65536) % 32768; - } - }; + PRNG prng; +}; - struct Renamer { - Renamer(unsigned int seed) { - prng.srand(seed); - } +void MetaRename(Function &F) { + for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); - const char *newName() { - return metaNames[prng.rand() % array_lengthof(metaNames)]; - } + for (auto &BB : F) { + BB.setName("bb"); - PRNG prng; - }; + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); + } +} - struct MetaRenamer : public ModulePass { - // Pass identification, replacement for typeid - static char ID; - - MetaRenamer() : ModulePass(ID) { - initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.setPreservesAll(); - } - - bool runOnModule(Module &M) override { - // Seed our PRNG with simple additive sum of ModuleID. We're looking to - // simply avoid always having the same function names, and we need to - // remain deterministic. - unsigned int randSeed = 0; - for (auto C : M.getModuleIdentifier()) - randSeed += C; - - Renamer renamer(randSeed); - - // Rename all aliases - for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { - StringRef Name = AI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - AI->setName("alias"); - } - - // Rename all global variables - for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { - StringRef Name = GI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - GI->setName("global"); - } - - // Rename all struct types - TypeFinder StructTypes; - StructTypes.run(M, true); - for (StructType *STy : StructTypes) { - if (STy->isLiteral() || STy->getName().empty()) continue; - - SmallString<128> NameStorage; - STy->setName((Twine("struct.") + - renamer.newName()).toStringRef(NameStorage)); - } - - // Rename all functions - for (auto &F : M) { - StringRef Name = F.getName(); - LibFunc Tmp; - // Leave library functions alone because their presence or absence could - // affect the behavior of other passes. - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - getAnalysis().getTLI(F).getLibFunc( - F, Tmp)) - continue; - - // Leave @main alone. The output of -metarenamer might be passed to - // lli for execution and the latter needs a main entry point. - if (Name != "main") - F.setName(renamer.newName()); - - runOnFunction(F); - } - return true; - } - - bool runOnFunction(Function &F) { - for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->getType()->isVoidTy()) - AI->setName("arg"); - - for (auto &BB : F) { - BB.setName("bb"); - - for (auto &I : BB) - if (!I.getType()->isVoidTy()) - I.setName("tmp"); - } - return true; - } - }; +void MetaRename(Module &M, + function_ref GetTLI) { + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (auto C : M.getModuleIdentifier()) + randSeed += C; + + Renamer renamer(randSeed); + + // Rename all aliases + for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + AI->setName("alias"); + } + + // Rename all global variables + for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + GI->setName("global"); + } + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *STy : StructTypes) { + if (STy->isLiteral() || STy->getName().empty()) + continue; + + SmallString<128> NameStorage; + STy->setName( + (Twine("struct.") + renamer.newName()).toStringRef(NameStorage)); + } + + // Rename all functions + for (auto &F : M) { + StringRef Name = F.getName(); + LibFunc Tmp; + // Leave library functions alone because their presence or absence could + // affect the behavior of other passes. + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + GetTLI(F).getLibFunc(F, Tmp)) + continue; + + // Leave @main alone. The output of -metarenamer might be passed to + // lli for execution and the latter needs a main entry point. + if (Name != "main") + F.setName(renamer.newName()); + + MetaRename(F); + } +} + +struct MetaRenamer : public ModulePass { + // Pass identification, replacement for typeid + static char ID; + + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + MetaRename(M, GetTLI); + return true; + } +}; } // end anonymous namespace @@ -175,3 +179,14 @@ INITIALIZE_PASS_END(MetaRenamer, "metarenamer", ModulePass *llvm::createMetaRenamerPass() { return new MetaRenamer(); } + +PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + MetaRename(M, GetTLI); + + return PreservedAnalyses::all(); +} diff --git a/llvm/test/Transforms/MetaRenamer/main.ll b/llvm/test/Transforms/MetaRenamer/main.ll index f11d70f2b1a7f..83b31044f434f 100644 --- a/llvm/test/Transforms/MetaRenamer/main.ll +++ b/llvm/test/Transforms/MetaRenamer/main.ll @@ -1,5 +1,6 @@ ; Make sure @main is left untouched. ; RUN: opt -metarenamer -S %s | FileCheck %s +; RUN: opt -passes=metarenamer -S %s | FileCheck %s ; CHECK: define void @main ; CHECK: call void @main diff --git a/llvm/test/Transforms/MetaRenamer/metarenamer.ll b/llvm/test/Transforms/MetaRenamer/metarenamer.ll index 9cc7eb2a73fb7..19e49b1892e8f 100644 --- a/llvm/test/Transforms/MetaRenamer/metarenamer.ll +++ b/llvm/test/Transforms/MetaRenamer/metarenamer.ll @@ -1,4 +1,5 @@ ; RUN: opt -metarenamer -S < %s | FileCheck %s +; RUN: opt -passes=metarenamer -S < %s | FileCheck %s ; CHECK: target triple {{.*}} ; CHECK-NOT: {{^x*}}xxx{{^x*}} From 3847986fd2c838026b9d883bb61f2e419988c1a5 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Fri, 2 Oct 2020 22:58:30 +0000 Subject: [PATCH 434/544] Fix the test case from D88686 It seems that one buildnot RSS value is much higher after munmap than local run. --- compiler-rt/test/dfsan/munmap_release_shadow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/dfsan/munmap_release_shadow.c b/compiler-rt/test/dfsan/munmap_release_shadow.c index 085844dfa6927..98147dc695b93 100644 --- a/compiler-rt/test/dfsan/munmap_release_shadow.c +++ b/compiler-rt/test/dfsan/munmap_release_shadow.c @@ -41,7 +41,7 @@ int main(int argc, char **argv) { assert(after_mmap >= before + mmap_cost_kb); // OS does not release memory to the same level as the start of the program. // The assert checks the memory after munmap up to a delta. - const size_t delta = 5000; - assert(after_munmap + mmap_cost_kb <= after_mmap + delta); + const size_t delta = 50000; + assert(after_munmap + delta <= after_mmap); return 0; } From cbd420c5ed8568774ace3ad8385b2346288e152c Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 29 Sep 2020 20:16:32 -0400 Subject: [PATCH 435/544] [CUDA][HIP] Fix bound arch for offload action for fat binary Currently CUDA/HIP toolchain uses "unknown" as bound arch for offload action for fat binary. This causes -mcpu or -march with "unknown" added in HIPToolChain::TranslateArgs or CUDAToolChain::TranslateArgs. This causes issue for https://reviews.llvm.org/D88377 since HIP toolchain needs to check -mcpu in HIPToolChain::TranslateArgs. The bound arch of offload action for fat binary is not really used, therefore set it to CudaArch::UNUSED. Differential Revision: https://reviews.llvm.org/D88524 --- clang/include/clang/Basic/Cuda.h | 1 + clang/lib/Basic/Cuda.cpp | 1 + clang/lib/Basic/Targets/NVPTX.cpp | 1 + clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 2 ++ clang/lib/Driver/Driver.cpp | 2 +- clang/test/Driver/hip-phases.hip | 28 ++++++++++++++++-- .../test/Driver/hip-toolchain-device-only.hip | 29 +++++++++++++++++++ 7 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 clang/test/Driver/hip-toolchain-device-only.hip diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 19301e825bcfd..93394f31abdcf 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -37,6 +37,7 @@ const char *CudaVersionToString(CudaVersion V); CudaVersion CudaStringToVersion(const llvm::Twine &S); enum class CudaArch { + UNUSED, UNKNOWN, SM_20, SM_21, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 2abbe3e81e0a2..7de42c1b90e55 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -64,6 +64,7 @@ struct CudaArchToStringMap { { CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" } CudaArchToStringMap arch_names[] = { // clang-format off + {CudaArch::UNUSED, "", ""}, SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi SM(30), SM(32), SM(35), SM(37), // Kepler SM(50), SM(52), SM(53), // Maxwell diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index ef61b8f78946c..aae89477e97d6 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -204,6 +204,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case CudaArch::GFX1031: case CudaArch::LAST: break; + case CudaArch::UNUSED: case CudaArch::UNKNOWN: assert(false && "No GPU arch when compiling CUDA device code."); return ""; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index d9ef6c2a10789..433256313c12c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -5034,6 +5034,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::GFX1012: case CudaArch::GFX1030: case CudaArch::GFX1031: + case CudaArch::UNUSED: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -5095,6 +5096,7 @@ static std::pair getSMsBlocksPerSM(CodeGenModule &CGM) { case CudaArch::GFX1012: case CudaArch::GFX1030: case CudaArch::GFX1031: + case CudaArch::UNUSED: case CudaArch::UNKNOWN: break; case CudaArch::LAST: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 9cc4b8212981b..96798b3d0adbb 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2517,7 +2517,7 @@ class OffloadingActionBuilder final { // If we have a fat binary, add it to the list. if (CudaFatBinary) { - AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN); + AddTopLevel(CudaFatBinary, CudaArch::UNUSED); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index 7c2dc1384ccd4..241448dfbd541 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -219,6 +219,12 @@ // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image +// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, ) +// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin // DBIN-NOT: host // // Test single gpu architecture up to the assemble phase in device-only @@ -230,6 +236,8 @@ // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler // DASM-NOT: host // @@ -242,9 +250,19 @@ // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) -// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) -// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image +// DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image +// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, ) +// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin // DBIN2-NOT: host // // Test two gpu architectures up to the assemble phase in device-only @@ -257,9 +275,13 @@ // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) +// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) +// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler // DASM2-NOT: host // diff --git a/clang/test/Driver/hip-toolchain-device-only.hip b/clang/test/Driver/hip-toolchain-device-only.hip new file mode 100644 index 0000000000000..19afeca113ba8 --- /dev/null +++ b/clang/test/Driver/hip-toolchain-device-only.hip @@ -0,0 +1,29 @@ +// REQUIRES: clang-driver, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --offload-arch=gfx803 --offload-arch=gfx900 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib -c \ +// RUN: %s 2>&1 | FileCheck -check-prefixes=CHECK,LINK %s + +// CHECK-NOT: error: + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-fcuda-is-device" +// CHECK-SAME: "-target-cpu" "gfx803" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip" + +// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-emit-obj" +// CHECK-SAME: "-fcuda-is-device" +// CHECK-SAME: "-target-cpu" "gfx900" +// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip" + +// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] + +// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_A_803]],[[IMG_DEV_A_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]" From c56bb45e839940fffabcbf928a3778cfffc31ddd Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Fri, 2 Oct 2020 16:18:15 -0700 Subject: [PATCH 436/544] [fuzzer] Remove unused variable `TempAutoDictionary` is never used. Maybe a leftover of a previous experiment? Differential Revision: https://reviews.llvm.org/D88763 --- compiler-rt/lib/fuzzer/FuzzerMutate.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index 37fd6100dac33..8cb9f86a0f732 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -129,9 +129,6 @@ class MutationDispatcher { // Dictionary provided by the user via -dict=DICT_FILE. Dictionary ManualDictionary; - // Temporary dictionary modified by the fuzzer itself, - // recreated periodically. - Dictionary TempAutoDictionary; // Persistent dictionary modified by the fuzzer, consists of // entries that led to successful discoveries in the past mutations. Dictionary PersistentAutoDictionary; From 2cd75f738ec6b150719a9468372d77b7064ff340 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 29 Sep 2020 20:23:03 -0400 Subject: [PATCH 437/544] Diagnose invalid target ID for AMDGPU toolchain for assembler AMDGPU toolchain currently only diagnose invalid target ID for OpenCL source compilation. Invalid target ID is not diagnosed for assembler. This patch fixes that. Differential Revision: https://reviews.llvm.org/D88377 --- clang/lib/Driver/ToolChains/AMDGPU.cpp | 14 +++--- clang/lib/Driver/ToolChains/AMDGPU.h | 7 ++- clang/lib/Driver/ToolChains/HIP.cpp | 4 +- clang/test/Driver/amdgpu-invalid-target-id.s | 45 ++++++++++++++++++++ 4 files changed, 55 insertions(+), 15 deletions(-) create mode 100644 clang/test/Driver/amdgpu-invalid-target-id.s diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 656de9dd9e1e2..c6087156642b2 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -426,6 +426,8 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, DAL->append(A); } + checkTargetID(*DAL); + if (!Args.getLastArgValue(options::OPT_x).equals("cl")) return DAL; @@ -518,8 +520,6 @@ void AMDGPUToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { - // Allow using target ID in -mcpu. - translateTargetID(DriverArgs, CC1Args); // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, @@ -536,21 +536,17 @@ AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); } -StringRef -AMDGPUToolChain::translateTargetID(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const { +void AMDGPUToolChain::checkTargetID( + const llvm::opt::ArgList &DriverArgs) const { StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); if (TargetID.empty()) - return StringRef(); + return; llvm::StringMap FeatureMap; auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); if (!OptionalGpuArch) { getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID; - return StringRef(); } - - return OptionalGpuArch.getValue(); } void ROCMToolChain::addClangTargetOptions( diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 0a91499c0cbf8..55ef6e01967ed 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -94,11 +94,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { bool shouldSkipArgument(const llvm::opt::Arg *Arg) const; protected: - /// Translate -mcpu option containing target ID to cc1 options. - /// Returns the GPU name. - StringRef translateTargetID(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const; + /// Check and diagnose invalid target ID specified by -mcpu. + void checkTargetID(const llvm::opt::ArgList &DriverArgs) const; + /// Get GPU arch from -mcpu without checking. StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const; }; diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 25b3ab88bc02e..07d72c073b4b6 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -240,8 +240,7 @@ void HIPToolChain::addClangTargetOptions( Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - // Allow using target ID in --offload-arch. - StringRef GpuArch = translateTargetID(DriverArgs, CC1Args); + StringRef GpuArch = getGPUArch(DriverArgs); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && @@ -353,6 +352,7 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, if (!BoundArch.empty()) { DAL->eraseArg(options::OPT_mcpu_EQ); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); + checkTargetID(*DAL); } return DAL; diff --git a/clang/test/Driver/amdgpu-invalid-target-id.s b/clang/test/Driver/amdgpu-invalid-target-id.s new file mode 100644 index 0000000000000..e20f09745d71c --- /dev/null +++ b/clang/test/Driver/amdgpu-invalid-target-id.s @@ -0,0 +1,45 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Invalid target ID: gfx908xnack + +// RUN: not %clang -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:xnack+:xnack+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target ID: gfx908:xnack+:xnack+ + +// RUN: not %clang -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:unknown+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target ID: gfx908:unknown+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:sram-ecc+:unknown+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target ID: gfx908:sram-ecc+:unknown+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900:sram-ecc+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target ID: gfx900:sram-ecc+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900:xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s + +// NOSIGN: error: Invalid target ID: gfx900:xnack + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900+xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s + +// NOCOLON: error: Invalid target ID: gfx900+xnack From 9a48411f35a122ac8755a8453887b919c77daf3f Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Fri, 2 Oct 2020 16:59:28 -0700 Subject: [PATCH 438/544] Revert "[Driver] Move detectLibcxxIncludePath to ToolChain" This reverts commit a594fd28e373cb7cd348cf01f6a90e055bf6cf6d which is failign on some bots. --- clang/include/clang/Driver/ToolChain.h | 3 --- clang/lib/Driver/ToolChain.cpp | 23 ----------------------- clang/lib/Driver/ToolChains/Fuchsia.cpp | 4 ++-- clang/lib/Driver/ToolChains/Gnu.cpp | 22 +++++++++++++++++++++- 4 files changed, 23 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index db4c4a7302325..7495e08fe6e64 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -575,9 +575,6 @@ class ToolChain { // given compilation arguments. virtual UnwindLibType GetUnwindLibType(const llvm::opt::ArgList &Args) const; - // Detect the highest available version of libc++ in base path. - virtual std::string detectLibcxxIncludePath(StringRef Base) const; - /// AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set /// the include paths to use for the given C++ standard library type. virtual void diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8e98e32068808..8991216da6765 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -924,29 +924,6 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs, } } -std::string ToolChain::detectLibcxxIncludePath(StringRef Base) const { - std::error_code EC; - int MaxVersion = 0; - std::string MaxVersionString; - for (llvm::vfs::directory_iterator LI = getVFS().dir_begin(Base, EC), LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef VersionText = llvm::sys::path::filename(LI->path()); - int Version; - if (VersionText[0] == 'v' && - !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { - if (Version > MaxVersion) { - MaxVersion = Version; - MaxVersionString = std::string(VersionText); - } - } - } - if (!MaxVersion) - return ""; - SmallString<128> P(Base); - llvm::sys::path::append(P, MaxVersionString); - return std::string(P.str()); -} - void ToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Header search paths should be handled by each of the subclasses. diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index e5f23ee385559..781179be39a36 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -319,8 +319,8 @@ void Fuchsia::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: { SmallString<128> P(getDriver().Dir); - llvm::sys::path::append(P, "..", "include", "c++"); - addSystemInclude(DriverArgs, CC1Args, detectLibcxxIncludePath(P.str())); + llvm::sys::path::append(P, "..", "include", "c++", "v1"); + addSystemInclude(DriverArgs, CC1Args, P.str()); break; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 3778b6f297ed2..f3843685a522b 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2877,11 +2877,31 @@ void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, } } +static std::string DetectLibcxxIncludePath(llvm::vfs::FileSystem &vfs, + StringRef base) { + std::error_code EC; + int MaxVersion = 0; + std::string MaxVersionString; + for (llvm::vfs::directory_iterator LI = vfs.dir_begin(base, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef VersionText = llvm::sys::path::filename(LI->path()); + int Version; + if (VersionText[0] == 'v' && + !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { + if (Version > MaxVersion) { + MaxVersion = Version; + MaxVersionString = std::string(VersionText); + } + } + } + return MaxVersion ? (base + "/" + MaxVersionString).str() : ""; +} + void Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { auto AddIncludePath = [&](std::string Path) { - std::string IncludePath = detectLibcxxIncludePath(Path); + std::string IncludePath = DetectLibcxxIncludePath(getVFS(), Path); if (IncludePath.empty() || !getVFS().exists(IncludePath)) return false; addSystemInclude(DriverArgs, CC1Args, IncludePath); From c8e73920ee61951096909a1d3cbfe7f2fa5bf814 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 2 Oct 2020 23:59:59 +0000 Subject: [PATCH 439/544] [gn build] Port ace644030e6 --- .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index f2e6f301ea733..fa1e24d323128 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -20,6 +20,7 @@ static_library("readability") { "DeleteNullPointerCheck.cpp", "DeletedDefaultCheck.cpp", "ElseAfterReturnCheck.cpp", + "FunctionCognitiveComplexityCheck.cpp", "FunctionSizeCheck.cpp", "IdentifierNamingCheck.cpp", "ImplicitBoolConversionCheck.cpp", From 64f2855f15d8496be2275ea82f8996868b618b15 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Fri, 2 Oct 2020 14:16:27 -0700 Subject: [PATCH 440/544] [CMake] Don't use CMakePushCheckState When we call cmake_pop_check_state, we undo any changes to REQUIRED variables performed by HandleLLVMOptions which is undesirable. Rather use replacement which is what we've used prior to 8d26760a. Differential Revision: https://reviews.llvm.org/D88756 --- llvm/runtimes/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index ecf8ac45c9e7b..598daa4502e1d 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -98,9 +98,6 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) include(CheckLibraryExists) include(CheckCCompilerFlag) - include(CMakePushCheckState) - - cmake_push_check_state() # We don't have libc++ (yet)... set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdinc++ -nostdlib++") @@ -118,7 +115,8 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) include(HandleLLVMOptions) include(FindPythonInterp) - cmake_pop_check_state() + # Remove the -nostdlib++ option we've added earlier. + string(REPLACE "-nostdlib++" "" CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") # Use libtool instead of ar if you are both on an Apple host, and targeting Apple. if(CMAKE_HOST_APPLE AND APPLE) From 9ae95a0f8f1bc9bd9e8eb30a5a9444fbdca5cc29 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Fri, 2 Oct 2020 20:05:09 -0400 Subject: [PATCH 441/544] [Sparc] Remove cast that truncates immediate operands to 32 bits. Patch by: Mark Kettenis Test provided by Jessica Clarke. Differential Revision: https://reviews.llvm.org/D87210 --- llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 2 +- llvm/test/CodeGen/SPARC/inlineasm-v9.ll | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 069e43c6f5445..7845a18b14c1f 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -351,7 +351,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_Immediate: - O << (int)MO.getImm(); + O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: MO.getMBB()->getSymbol()->print(O, MAI); diff --git a/llvm/test/CodeGen/SPARC/inlineasm-v9.ll b/llvm/test/CodeGen/SPARC/inlineasm-v9.ll index 53ab114dd8d57..1388c8655ace1 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm-v9.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm-v9.ll @@ -39,3 +39,12 @@ entry: tail call void asm sideeffect "faddq $0,$1,$2", "{f40},{f40},{f40}"(fp128 0xL0, fp128 0xL0, fp128 0xL0) ret void } + +;; Ensure that 64-bit immediates aren't truncated +; CHECK-LABEL: test_large_immediate +; CHECK: or %o0, %lo(4294967296), %o0 +define i64 @test_large_immediate(i64) { +entry: + %1 = tail call i64 asm "or $0, %lo($1), $0", "=r,i,r"(i64 4294967296, i64 %0) + ret i64 %1 +} From 88c9162c9d47ef43a505bc5301dc626f3cd4f437 Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Sat, 3 Oct 2020 00:21:58 +0000 Subject: [PATCH 442/544] Fix the test case in D88686 Adjusted when to check RSS. --- compiler-rt/test/dfsan/munmap_release_shadow.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/dfsan/munmap_release_shadow.c b/compiler-rt/test/dfsan/munmap_release_shadow.c index 98147dc695b93..03197dfb86413 100644 --- a/compiler-rt/test/dfsan/munmap_release_shadow.c +++ b/compiler-rt/test/dfsan/munmap_release_shadow.c @@ -32,8 +32,14 @@ int main(int argc, char **argv) { munmap(p, map_size); size_t after_munmap = get_rss_kb(); - fprintf(stderr, "RSS at start: %td, after mmap: %td, after mumap: %td\n", - before, after_mmap, after_munmap); + p = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + dfsan_set_label(label, &val, sizeof(val)); + memset(p, val, map_size); + size_t after_mmap2 = get_rss_kb(); + + fprintf(stderr, "RSS at start: %td, after mmap: %td, after mumap: %td, after mmap2: %td\n", + before, after_mmap, after_munmap, after_mmap2); // The memory after mmap increases 3 times of map_size because the overhead of // shadow memory is 2x. @@ -42,6 +48,7 @@ int main(int argc, char **argv) { // OS does not release memory to the same level as the start of the program. // The assert checks the memory after munmap up to a delta. const size_t delta = 50000; - assert(after_munmap + delta <= after_mmap); + assert(after_mmap2 <= after_mmap + delta); + return 0; } From 1c897e9d72979730f7555e77dd54fe892a461637 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 22 Sep 2020 18:27:03 -0700 Subject: [PATCH 443/544] [lsan] Share platform allocator settings between ASan and LSan This moves the platform-specific parameter logic from asan into lsan_common.h to lsan can share it. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D87795 --- compiler-rt/lib/asan/asan_allocator.h | 51 ++++++------------- compiler-rt/lib/lsan/lsan_allocator.h | 51 +++++++------------ compiler-rt/lib/lsan/lsan_common.h | 71 ++++++++++++++++++++------- 3 files changed, 87 insertions(+), 86 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index 612799f90964a..c5f6de123a622 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -15,13 +15,20 @@ #define ASAN_ALLOCATOR_H #include "asan_flags.h" -#include "asan_internal.h" #include "asan_interceptors.h" +#include "asan_internal.h" +#include "lsan/lsan_common.h" #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_list.h" namespace __asan { +// These are defined in lsan_common.h because they are shared between the asan +// allocator and the standalone lsan allocator. +using __lsan::AllocatorSizeClassMap; +using __lsan::kAllocatorSize; +using __lsan::kAllocatorSpace; + enum AllocType { FROM_MALLOC = 1, // Memory block came from malloc, calloc, realloc, etc. FROM_NEW = 2, // Memory block came from operator new. @@ -97,9 +104,9 @@ AsanChunkView FindHeapChunkByAddress(uptr address); AsanChunkView FindHeapChunkByAllocBeg(uptr address); // List of AsanChunks with total size. -class AsanChunkFifoList: public IntrusiveList { +class AsanChunkFifoList : public IntrusiveList { public: - explicit AsanChunkFifoList(LinkerInitialized) { } + explicit AsanChunkFifoList(LinkerInitialized) {} AsanChunkFifoList() { clear(); } void Push(AsanChunk *n); void PushList(AsanChunkFifoList *q); @@ -109,6 +116,7 @@ class AsanChunkFifoList: public IntrusiveList { IntrusiveList::clear(); size_ = 0; } + private: uptr size_; }; @@ -118,39 +126,9 @@ struct AsanMapUnmapCallback { void OnUnmap(uptr p, uptr size) const; }; +using SizeClassMap = AllocatorSizeClassMap; + #if SANITIZER_CAN_USE_ALLOCATOR64 -# if SANITIZER_FUCHSIA -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -typedef DefaultSizeClassMap SizeClassMap; -# elif defined(__powerpc64__) -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -typedef DefaultSizeClassMap SizeClassMap; -# elif defined(__aarch64__) && SANITIZER_ANDROID -// Android needs to support 39, 42 and 48 bit VMA. -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x2000000000ULL; // 128G. -typedef VeryCompactSizeClassMap SizeClassMap; -# elif defined(__aarch64__) -// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA -// so no need to different values for different VMA. -const uptr kAllocatorSpace = 0x10000000000ULL; -const uptr kAllocatorSize = 0x10000000000ULL; // 3T. -typedef DefaultSizeClassMap SizeClassMap; -#elif defined(__sparc__) -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -typedef DefaultSizeClassMap SizeClassMap; -# elif SANITIZER_WINDOWS -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x8000000000ULL; // 500G -typedef DefaultSizeClassMap SizeClassMap; -# else -const uptr kAllocatorSpace = 0x600000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -typedef DefaultSizeClassMap SizeClassMap; -# endif template struct AP64 { // Allocator64 parameters. Deliberately using a short name. static const uptr kSpaceBeg = kAllocatorSpace; @@ -179,7 +157,7 @@ struct AP32 { static const uptr kFlags = 0; }; template -using PrimaryAllocatorASVT = SizeClassAllocator32 >; +using PrimaryAllocatorASVT = SizeClassAllocator32>; using PrimaryAllocator = PrimaryAllocatorASVT; #endif // SANITIZER_CAN_USE_ALLOCATOR64 @@ -195,6 +173,7 @@ struct AsanThreadLocalMallocStorage { uptr quarantine_cache[16]; AllocatorCache allocator_cache; void CommitBack(); + private: // These objects are allocated via mmap() and are zero-initialized. AsanThreadLocalMallocStorage() {} diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index 17e13cd014ba4..7ee84e2cd5b95 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -14,10 +14,10 @@ #ifndef LSAN_ALLOCATOR_H #define LSAN_ALLOCATOR_H +#include "lsan_common.h" #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_internal_defs.h" -#include "lsan_common.h" namespace __lsan { @@ -28,7 +28,7 @@ void *Reallocate(const StackTrace &stack, void *p, uptr new_size, uptr alignment); uptr GetMallocUsableSize(const void *p); -template +template void ForEachChunk(const Callable &callback); void GetAllocatorCacheRange(uptr *begin, uptr *end); @@ -49,51 +49,36 @@ struct ChunkMetadata { u32 stack_trace_id; }; -#if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ - defined(__arm__) +#if SANITIZER_CAN_USE_ALLOCATOR64 template -struct AP32 { - static const uptr kSpaceBeg = 0; - static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; +struct AP64 { // Allocator64 parameters. Deliberately using a short name. + static const uptr kSpaceBeg = kAllocatorSpace; + static const uptr kSpaceSize = kAllocatorSize; static const uptr kMetadataSize = sizeof(ChunkMetadata); - typedef __sanitizer::CompactSizeClassMap SizeClassMap; - static const uptr kRegionSizeLog = 20; - using AddressSpaceView = AddressSpaceViewTy; + typedef AllocatorSizeClassMap SizeClassMap; typedef NoOpMapUnmapCallback MapUnmapCallback; static const uptr kFlags = 0; + using AddressSpaceView = AddressSpaceViewTy; }; template -using PrimaryAllocatorASVT = SizeClassAllocator32>; +using PrimaryAllocatorASVT = SizeClassAllocator64>; using PrimaryAllocator = PrimaryAllocatorASVT; -#elif defined(__x86_64__) || defined(__powerpc64__) || defined(__s390x__) -# if SANITIZER_FUCHSIA -const uptr kAllocatorSpace = ~(uptr)0; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -# elif defined(__powerpc64__) -const uptr kAllocatorSpace = 0xa0000000000ULL; -const uptr kAllocatorSize = 0x20000000000ULL; // 2T. -#elif defined(__s390x__) -const uptr kAllocatorSpace = 0x40000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -# else -const uptr kAllocatorSpace = 0x600000000000ULL; -const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -# endif +#else // !SANITIZER_CAN_USE_ALLOCATOR64 template -struct AP64 { // Allocator64 parameters. Deliberately using a short name. - static const uptr kSpaceBeg = kAllocatorSpace; - static const uptr kSpaceSize = kAllocatorSize; +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; static const uptr kMetadataSize = sizeof(ChunkMetadata); - typedef DefaultSizeClassMap SizeClassMap; + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = 20; + using AddressSpaceView = AddressSpaceViewTy; typedef NoOpMapUnmapCallback MapUnmapCallback; static const uptr kFlags = 0; - using AddressSpaceView = AddressSpaceViewTy; }; - template -using PrimaryAllocatorASVT = SizeClassAllocator64>; +using PrimaryAllocatorASVT = SizeClassAllocator32>; using PrimaryAllocator = PrimaryAllocatorASVT; -#endif +#endif // SANITIZER_CAN_USE_ALLOCATOR64 template using AllocatorASVT = CombinedAllocator>; diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h index 3434beede8289..c5ea1eb606940 100644 --- a/compiler-rt/lib/lsan/lsan_common.h +++ b/compiler-rt/lib/lsan/lsan_common.h @@ -37,8 +37,7 @@ #elif defined(__i386__) && \ (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) #define CAN_SANITIZE_LEAKS 1 -#elif defined(__arm__) && \ - SANITIZER_LINUX && !SANITIZER_ANDROID +#elif defined(__arm__) && SANITIZER_LINUX && !SANITIZER_ANDROID #define CAN_SANITIZE_LEAKS 1 #elif SANITIZER_NETBSD || SANITIZER_FUCHSIA #define CAN_SANITIZE_LEAKS 1 @@ -50,10 +49,50 @@ namespace __sanitizer { class FlagParser; class ThreadRegistry; struct DTLS; -} +} // namespace __sanitizer namespace __lsan { +// The platform-specific allocator parameters are shared by both +// asan_allocator.h and lsan_allocator.h. +#if SANITIZER_CAN_USE_ALLOCATOR64 +#if SANITIZER_FUCHSIA +constexpr uptr kAllocatorSpace = ~(uptr)0; +constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +#elif defined(__powerpc64__) +constexpr uptr kAllocatorSpace = ~(uptr)0; +constexpr uptr kAllocatorSize = 0x20000000000ULL; // 2T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +#elif defined(__aarch64__) && SANITIZER_ANDROID +// Android needs to support 39, 42 and 48 bit VMA. +constexpr uptr kAllocatorSpace = ~(uptr)0; +constexpr uptr kAllocatorSize = 0x2000000000ULL; // 128G. +using AllocatorSizeClassMap = VeryCompactSizeClassMap; +#elif defined(__aarch64__) +// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA +// so no need to different values for different VMA. +constexpr uptr kAllocatorSpace = 0x10000000000ULL; +constexpr uptr kAllocatorSize = 0x10000000000ULL; // 3T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +#elif defined(__sparc__) +constexpr uptr kAllocatorSpace = ~(uptr)0; +constexpr uptr kAllocatorSize = 0x20000000000ULL; // 2T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +#elif SANITIZER_WINDOWS +// On Windows 64-bit there is no easy way to find a large enough fixed address +// space that is always available. Thus, a dynamically allocated address space +// is used instead (i.e. ~(uptr)0). +constexpr uptr kAllocatorSpace = ~(uptr)0; +constexpr uptr kAllocatorSize = 0x8000000000ULL; // 500G +using AllocatorSizeClassMap = DefaultSizeClassMap; +#else +constexpr uptr kAllocatorSpace = 0x600000000000ULL; +constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T. +using AllocatorSizeClassMap = DefaultSizeClassMap; +#endif +#endif // SANITIZER_CAN_USE_ALLOCATOR64 + // Chunk tags. enum ChunkTag { kDirectlyLeaked = 0, // default @@ -62,7 +101,7 @@ enum ChunkTag { kIgnored = 3 }; -const u32 kInvalidTid = (u32) -1; +const u32 kInvalidTid = (u32)-1; struct Flags { #define LSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; @@ -70,9 +109,7 @@ struct Flags { #undef LSAN_FLAG void SetDefaults(); - uptr pointer_alignment() const { - return use_unaligned ? 1 : sizeof(uptr); - } + uptr pointer_alignment() const { return use_unaligned ? 1 : sizeof(uptr); } }; extern Flags lsan_flags; @@ -139,14 +176,13 @@ struct CheckForLeaksParam { InternalMmapVector const *GetRootRegions(); void ScanRootRegion(Frontier *frontier, RootRegion const ®ion, uptr region_begin, uptr region_end, bool is_readable); -void ForEachExtraStackRangeCb(uptr begin, uptr end, void* arg); +void ForEachExtraStackRangeCb(uptr begin, uptr end, void *arg); // Run stoptheworld while holding any platform-specific locks, as well as the // allocator and thread registry locks. void LockStuffAndStopTheWorld(StopTheWorldCallback callback, - CheckForLeaksParam* argument); + CheckForLeaksParam *argument); -void ScanRangeForPointers(uptr begin, uptr end, - Frontier *frontier, +void ScanRangeForPointers(uptr begin, uptr end, Frontier *frontier, const char *region_type, ChunkTag tag); void ScanGlobalRange(uptr begin, uptr end, Frontier *frontier); @@ -260,6 +296,7 @@ class LsanMetadata { void set_tag(ChunkTag value); uptr requested_size() const; u32 stack_trace_id() const; + private: void *metadata_; }; @@ -267,14 +304,14 @@ class LsanMetadata { } // namespace __lsan extern "C" { -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char *__lsan_default_options(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char * +__lsan_default_options(); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -int __lsan_is_turned_off(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int +__lsan_is_turned_off(); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char *__lsan_default_suppressions(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char * +__lsan_default_suppressions(); } // extern "C" #endif // LSAN_COMMON_H From 5b0cfe93b6cdaf2e0383ed8edaa867430d7e6e4e Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 2 Oct 2020 18:12:57 -0700 Subject: [PATCH 444/544] Revert "[lsan] Share platform allocator settings between ASan and LSan" This reverts commit 1c897e9d72979730f7555e77dd54fe892a461637. It broke builds for 32-bit targets. Differential Revision: https://reviews.llvm.org/D88768 --- compiler-rt/lib/asan/asan_allocator.h | 51 +++++++++++++------ compiler-rt/lib/lsan/lsan_allocator.h | 51 ++++++++++++------- compiler-rt/lib/lsan/lsan_common.h | 71 +++++++-------------------- 3 files changed, 86 insertions(+), 87 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index c5f6de123a622..612799f90964a 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -15,20 +15,13 @@ #define ASAN_ALLOCATOR_H #include "asan_flags.h" -#include "asan_interceptors.h" #include "asan_internal.h" -#include "lsan/lsan_common.h" +#include "asan_interceptors.h" #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_list.h" namespace __asan { -// These are defined in lsan_common.h because they are shared between the asan -// allocator and the standalone lsan allocator. -using __lsan::AllocatorSizeClassMap; -using __lsan::kAllocatorSize; -using __lsan::kAllocatorSpace; - enum AllocType { FROM_MALLOC = 1, // Memory block came from malloc, calloc, realloc, etc. FROM_NEW = 2, // Memory block came from operator new. @@ -104,9 +97,9 @@ AsanChunkView FindHeapChunkByAddress(uptr address); AsanChunkView FindHeapChunkByAllocBeg(uptr address); // List of AsanChunks with total size. -class AsanChunkFifoList : public IntrusiveList { +class AsanChunkFifoList: public IntrusiveList { public: - explicit AsanChunkFifoList(LinkerInitialized) {} + explicit AsanChunkFifoList(LinkerInitialized) { } AsanChunkFifoList() { clear(); } void Push(AsanChunk *n); void PushList(AsanChunkFifoList *q); @@ -116,7 +109,6 @@ class AsanChunkFifoList : public IntrusiveList { IntrusiveList::clear(); size_ = 0; } - private: uptr size_; }; @@ -126,9 +118,39 @@ struct AsanMapUnmapCallback { void OnUnmap(uptr p, uptr size) const; }; -using SizeClassMap = AllocatorSizeClassMap; - #if SANITIZER_CAN_USE_ALLOCATOR64 +# if SANITIZER_FUCHSIA +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +typedef DefaultSizeClassMap SizeClassMap; +# elif defined(__powerpc64__) +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +typedef DefaultSizeClassMap SizeClassMap; +# elif defined(__aarch64__) && SANITIZER_ANDROID +// Android needs to support 39, 42 and 48 bit VMA. +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x2000000000ULL; // 128G. +typedef VeryCompactSizeClassMap SizeClassMap; +# elif defined(__aarch64__) +// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA +// so no need to different values for different VMA. +const uptr kAllocatorSpace = 0x10000000000ULL; +const uptr kAllocatorSize = 0x10000000000ULL; // 3T. +typedef DefaultSizeClassMap SizeClassMap; +#elif defined(__sparc__) +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +typedef DefaultSizeClassMap SizeClassMap; +# elif SANITIZER_WINDOWS +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x8000000000ULL; // 500G +typedef DefaultSizeClassMap SizeClassMap; +# else +const uptr kAllocatorSpace = 0x600000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +typedef DefaultSizeClassMap SizeClassMap; +# endif template struct AP64 { // Allocator64 parameters. Deliberately using a short name. static const uptr kSpaceBeg = kAllocatorSpace; @@ -157,7 +179,7 @@ struct AP32 { static const uptr kFlags = 0; }; template -using PrimaryAllocatorASVT = SizeClassAllocator32>; +using PrimaryAllocatorASVT = SizeClassAllocator32 >; using PrimaryAllocator = PrimaryAllocatorASVT; #endif // SANITIZER_CAN_USE_ALLOCATOR64 @@ -173,7 +195,6 @@ struct AsanThreadLocalMallocStorage { uptr quarantine_cache[16]; AllocatorCache allocator_cache; void CommitBack(); - private: // These objects are allocated via mmap() and are zero-initialized. AsanThreadLocalMallocStorage() {} diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index 7ee84e2cd5b95..17e13cd014ba4 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -14,10 +14,10 @@ #ifndef LSAN_ALLOCATOR_H #define LSAN_ALLOCATOR_H -#include "lsan_common.h" #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_internal_defs.h" +#include "lsan_common.h" namespace __lsan { @@ -28,7 +28,7 @@ void *Reallocate(const StackTrace &stack, void *p, uptr new_size, uptr alignment); uptr GetMallocUsableSize(const void *p); -template +template void ForEachChunk(const Callable &callback); void GetAllocatorCacheRange(uptr *begin, uptr *end); @@ -49,21 +49,8 @@ struct ChunkMetadata { u32 stack_trace_id; }; -#if SANITIZER_CAN_USE_ALLOCATOR64 -template -struct AP64 { // Allocator64 parameters. Deliberately using a short name. - static const uptr kSpaceBeg = kAllocatorSpace; - static const uptr kSpaceSize = kAllocatorSize; - static const uptr kMetadataSize = sizeof(ChunkMetadata); - typedef AllocatorSizeClassMap SizeClassMap; - typedef NoOpMapUnmapCallback MapUnmapCallback; - static const uptr kFlags = 0; - using AddressSpaceView = AddressSpaceViewTy; -}; -template -using PrimaryAllocatorASVT = SizeClassAllocator64>; -using PrimaryAllocator = PrimaryAllocatorASVT; -#else // !SANITIZER_CAN_USE_ALLOCATOR64 +#if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ + defined(__arm__) template struct AP32 { static const uptr kSpaceBeg = 0; @@ -78,7 +65,35 @@ struct AP32 { template using PrimaryAllocatorASVT = SizeClassAllocator32>; using PrimaryAllocator = PrimaryAllocatorASVT; -#endif // SANITIZER_CAN_USE_ALLOCATOR64 +#elif defined(__x86_64__) || defined(__powerpc64__) || defined(__s390x__) +# if SANITIZER_FUCHSIA +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +# elif defined(__powerpc64__) +const uptr kAllocatorSpace = 0xa0000000000ULL; +const uptr kAllocatorSize = 0x20000000000ULL; // 2T. +#elif defined(__s390x__) +const uptr kAllocatorSpace = 0x40000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +# else +const uptr kAllocatorSpace = 0x600000000000ULL; +const uptr kAllocatorSize = 0x40000000000ULL; // 4T. +# endif +template +struct AP64 { // Allocator64 parameters. Deliberately using a short name. + static const uptr kSpaceBeg = kAllocatorSpace; + static const uptr kSpaceSize = kAllocatorSize; + static const uptr kMetadataSize = sizeof(ChunkMetadata); + typedef DefaultSizeClassMap SizeClassMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; + using AddressSpaceView = AddressSpaceViewTy; +}; + +template +using PrimaryAllocatorASVT = SizeClassAllocator64>; +using PrimaryAllocator = PrimaryAllocatorASVT; +#endif template using AllocatorASVT = CombinedAllocator>; diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h index c5ea1eb606940..3434beede8289 100644 --- a/compiler-rt/lib/lsan/lsan_common.h +++ b/compiler-rt/lib/lsan/lsan_common.h @@ -37,7 +37,8 @@ #elif defined(__i386__) && \ (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) #define CAN_SANITIZE_LEAKS 1 -#elif defined(__arm__) && SANITIZER_LINUX && !SANITIZER_ANDROID +#elif defined(__arm__) && \ + SANITIZER_LINUX && !SANITIZER_ANDROID #define CAN_SANITIZE_LEAKS 1 #elif SANITIZER_NETBSD || SANITIZER_FUCHSIA #define CAN_SANITIZE_LEAKS 1 @@ -49,50 +50,10 @@ namespace __sanitizer { class FlagParser; class ThreadRegistry; struct DTLS; -} // namespace __sanitizer +} namespace __lsan { -// The platform-specific allocator parameters are shared by both -// asan_allocator.h and lsan_allocator.h. -#if SANITIZER_CAN_USE_ALLOCATOR64 -#if SANITIZER_FUCHSIA -constexpr uptr kAllocatorSpace = ~(uptr)0; -constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -#elif defined(__powerpc64__) -constexpr uptr kAllocatorSpace = ~(uptr)0; -constexpr uptr kAllocatorSize = 0x20000000000ULL; // 2T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -#elif defined(__aarch64__) && SANITIZER_ANDROID -// Android needs to support 39, 42 and 48 bit VMA. -constexpr uptr kAllocatorSpace = ~(uptr)0; -constexpr uptr kAllocatorSize = 0x2000000000ULL; // 128G. -using AllocatorSizeClassMap = VeryCompactSizeClassMap; -#elif defined(__aarch64__) -// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA -// so no need to different values for different VMA. -constexpr uptr kAllocatorSpace = 0x10000000000ULL; -constexpr uptr kAllocatorSize = 0x10000000000ULL; // 3T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -#elif defined(__sparc__) -constexpr uptr kAllocatorSpace = ~(uptr)0; -constexpr uptr kAllocatorSize = 0x20000000000ULL; // 2T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -#elif SANITIZER_WINDOWS -// On Windows 64-bit there is no easy way to find a large enough fixed address -// space that is always available. Thus, a dynamically allocated address space -// is used instead (i.e. ~(uptr)0). -constexpr uptr kAllocatorSpace = ~(uptr)0; -constexpr uptr kAllocatorSize = 0x8000000000ULL; // 500G -using AllocatorSizeClassMap = DefaultSizeClassMap; -#else -constexpr uptr kAllocatorSpace = 0x600000000000ULL; -constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T. -using AllocatorSizeClassMap = DefaultSizeClassMap; -#endif -#endif // SANITIZER_CAN_USE_ALLOCATOR64 - // Chunk tags. enum ChunkTag { kDirectlyLeaked = 0, // default @@ -101,7 +62,7 @@ enum ChunkTag { kIgnored = 3 }; -const u32 kInvalidTid = (u32)-1; +const u32 kInvalidTid = (u32) -1; struct Flags { #define LSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; @@ -109,7 +70,9 @@ struct Flags { #undef LSAN_FLAG void SetDefaults(); - uptr pointer_alignment() const { return use_unaligned ? 1 : sizeof(uptr); } + uptr pointer_alignment() const { + return use_unaligned ? 1 : sizeof(uptr); + } }; extern Flags lsan_flags; @@ -176,13 +139,14 @@ struct CheckForLeaksParam { InternalMmapVector const *GetRootRegions(); void ScanRootRegion(Frontier *frontier, RootRegion const ®ion, uptr region_begin, uptr region_end, bool is_readable); -void ForEachExtraStackRangeCb(uptr begin, uptr end, void *arg); +void ForEachExtraStackRangeCb(uptr begin, uptr end, void* arg); // Run stoptheworld while holding any platform-specific locks, as well as the // allocator and thread registry locks. void LockStuffAndStopTheWorld(StopTheWorldCallback callback, - CheckForLeaksParam *argument); + CheckForLeaksParam* argument); -void ScanRangeForPointers(uptr begin, uptr end, Frontier *frontier, +void ScanRangeForPointers(uptr begin, uptr end, + Frontier *frontier, const char *region_type, ChunkTag tag); void ScanGlobalRange(uptr begin, uptr end, Frontier *frontier); @@ -296,7 +260,6 @@ class LsanMetadata { void set_tag(ChunkTag value); uptr requested_size() const; u32 stack_trace_id() const; - private: void *metadata_; }; @@ -304,14 +267,14 @@ class LsanMetadata { } // namespace __lsan extern "C" { -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char * -__lsan_default_options(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE +const char *__lsan_default_options(); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int -__lsan_is_turned_off(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE +int __lsan_is_turned_off(); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char * -__lsan_default_suppressions(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE +const char *__lsan_default_suppressions(); } // extern "C" #endif // LSAN_COMMON_H From 63fc8499f305be192f0f9a62c43e0fcfdbdb7607 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 2 Oct 2020 17:06:05 -0700 Subject: [PATCH 445/544] [X86] Add missing intrinsic test for aesdecwide128kl and aesdecwide256kl. Capture all output values in keylocker tests. NFC The aesdec/enc instructions produce a flag output and one or eight xmm regsiter outputs. The test were not capturing the xmm outputs. Also add nounwind to tests to remove .cfi directives --- llvm/test/CodeGen/X86/keylocker-intrinsics.ll | 334 +++++++++++++++--- 1 file changed, 282 insertions(+), 52 deletions(-) diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll index 472eed484a16e..d577ffd12e086 100644 --- a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll @@ -12,7 +12,9 @@ declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*) declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*) declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*) declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { ; X64-LABEL: test_loadiwkey: @@ -31,7 +33,7 @@ entry: ret void } -define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) { +define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) nounwind { ; X64-LABEL: test_encodekey128_u32: ; X64: # %bb.0: # %entry ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 @@ -47,17 +49,9 @@ define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocaptu ; X32-LABEL: test_encodekey128_u32: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: pushl %edi -; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 20 -; X32-NEXT: .cfi_offset %esi, -20 -; X32-NEXT: .cfi_offset %edi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -73,13 +67,9 @@ define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocaptu ; X32-NEXT: vmovaps %xmm5, (%edx) ; X32-NEXT: vmovaps %xmm6, (%ecx) ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: popl %edi -; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl entry: %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) @@ -99,7 +89,7 @@ entry: ret i32 %7 } -define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x i64>* nocapture readnone %h6) { +define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x i64>* nocapture readnone %h6) nounwind { ; X64-LABEL: test_encodekey256_u32: ; X64: # %bb.0: # %entry ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 @@ -115,17 +105,9 @@ define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_ ; X32-LABEL: test_encodekey256_u32: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: pushl %edi -; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 20 -; X32-NEXT: .cfi_offset %esi, -20 -; X32-NEXT: .cfi_offset %edi, -16 -; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -141,13 +123,9 @@ define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_ ; X32-NEXT: vmovaps %xmm4, (%edx) ; X32-NEXT: vmovaps %xmm5, (%ecx) ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: popl %edi -; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl entry: %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) @@ -167,96 +145,126 @@ entry: ret i32 %7 } -define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h) { +define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) { ; X64-LABEL: test_mm_aesenc128kl_u8: ; X64: # %bb.0: # %entry ; X64-NEXT: aesenc128kl (%rdi), %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) ; X64-NEXT: retq ; ; X32-LABEL: test_mm_aesenc128kl_u8: ; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: aesenc128kl (%eax), %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: vmovaps %xmm0, (%ecx) ; X32-NEXT: retl entry: %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, i8* %h) - %1 = extractvalue { i8, <2 x i64> } %0, 0 - ret i8 %1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 } -define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, i8* %h) { +define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) { ; X64-LABEL: test_mm_aesdec128kl_u8: ; X64: # %bb.0: # %entry ; X64-NEXT: aesdec128kl (%rdi), %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) ; X64-NEXT: retq ; ; X32-LABEL: test_mm_aesdec128kl_u8: ; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: aesdec128kl (%eax), %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: vmovaps %xmm0, (%ecx) ; X32-NEXT: retl entry: %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, i8* %h) - %1 = extractvalue { i8, <2 x i64> } %0, 0 - ret i8 %1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 } -define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, i8* %h) { +define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) { ; X64-LABEL: test_mm_aesenc256kl_u8: ; X64: # %bb.0: # %entry ; X64-NEXT: aesenc256kl (%rdi), %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) ; X64-NEXT: retq ; ; X32-LABEL: test_mm_aesenc256kl_u8: ; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: aesenc256kl (%eax), %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: vmovaps %xmm0, (%ecx) ; X32-NEXT: retl entry: %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, i8* %h) - %1 = extractvalue { i8, <2 x i64> } %0, 0 - ret i8 %1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 } -define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, i8* %h) { +define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) { ; X64-LABEL: test_mm_aesdec256kl_u8: ; X64: # %bb.0: # %entry ; X64-NEXT: aesdec256kl (%rdi), %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) ; X64-NEXT: retq ; ; X32-LABEL: test_mm_aesdec256kl_u8: ; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: aesdec256kl (%eax), %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: vmovaps %xmm0, (%ecx) ; X32-NEXT: retl entry: %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, i8* %h) - %1 = extractvalue { i8, <2 x i64> } %0, 0 - ret i8 %1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 } -define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) { +define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind { ; X64-LABEL: test_mm_aesencwide128kl_u8: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx ; X64-NEXT: aesencwide128kl (%rdi) ; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm1, (%rcx) +; X64-NEXT: movaps %xmm1, (%r8) +; X64-NEXT: movaps %xmm1, (%r9) +; X64-NEXT: movaps %xmm1, (%rbx) +; X64-NEXT: movaps %xmm1, (%r11) +; X64-NEXT: movaps %xmm1, (%r10) +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; ; X32-LABEL: test_mm_aesencwide128kl_u8: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: andl $-16, %esp ; X32-NEXT: subl $16, %esp ; X32-NEXT: vmovaps 24(%ebp), %xmm3 @@ -266,31 +274,147 @@ define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x ; X32-NEXT: vmovaps 88(%ebp), %xmm7 ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: aesencwide128kl (%eax) +; X32-NEXT: movl 104(%ebp), %eax +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: movl 108(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 112(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 116(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 120(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 124(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 128(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 132(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) ; X32-NEXT: sete %al ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl entry: %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) - %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 - ret i8 %1 + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out0 + %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %out1 + %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %2, <2 x i64>* %out2 + %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %2, <2 x i64>* %out3 + %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %2, <2 x i64>* %out4 + %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %2, <2 x i64>* %out5 + %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + store <2 x i64> %2, <2 x i64>* %out6 + %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 + store <2 x i64> %2, <2 x i64>* %out7 + %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %9 +} + +define i8 @test_mm_aesdecwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind { +; X64-LABEL: test_mm_aesdecwide128kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; X64-NEXT: aesdecwide128kl (%rdi) +; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm1, (%rcx) +; X64-NEXT: movaps %xmm1, (%r8) +; X64-NEXT: movaps %xmm1, (%r9) +; X64-NEXT: movaps %xmm1, (%rbx) +; X64-NEXT: movaps %xmm1, (%r11) +; X64-NEXT: movaps %xmm1, (%r10) +; X64-NEXT: popq %rbx +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesdecwide128kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: vmovaps 24(%ebp), %xmm3 +; X32-NEXT: vmovaps 40(%ebp), %xmm4 +; X32-NEXT: vmovaps 56(%ebp), %xmm5 +; X32-NEXT: vmovaps 72(%ebp), %xmm6 +; X32-NEXT: vmovaps 88(%ebp), %xmm7 +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: aesdecwide128kl (%eax) +; X32-NEXT: movl 104(%ebp), %eax +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: movl 108(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 112(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 116(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 120(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 124(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 128(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 132(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: sete %al +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +entry: + %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out0 + %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %out1 + %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %2, <2 x i64>* %out2 + %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %2, <2 x i64>* %out3 + %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %2, <2 x i64>* %out4 + %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %2, <2 x i64>* %out5 + %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + store <2 x i64> %2, <2 x i64>* %out6 + %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 + store <2 x i64> %2, <2 x i64>* %out7 + %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %9 } -define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) { +define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind { ; X64-LABEL: test_mm_aesencwide256kl_u8: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx ; X64-NEXT: aesencwide256kl (%rdi) ; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm1, (%rcx) +; X64-NEXT: movaps %xmm1, (%r8) +; X64-NEXT: movaps %xmm1, (%r9) +; X64-NEXT: movaps %xmm1, (%rbx) +; X64-NEXT: movaps %xmm1, (%r11) +; X64-NEXT: movaps %xmm1, (%r10) +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; ; X32-LABEL: test_mm_aesencwide256kl_u8: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: andl $-16, %esp ; X32-NEXT: subl $16, %esp ; X32-NEXT: vmovaps 24(%ebp), %xmm3 @@ -300,13 +424,119 @@ define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x ; X32-NEXT: vmovaps 88(%ebp), %xmm7 ; X32-NEXT: movl 8(%ebp), %eax ; X32-NEXT: aesencwide256kl (%eax) +; X32-NEXT: movl 104(%ebp), %eax +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: movl 108(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 112(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 116(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 120(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 124(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 128(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 132(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) ; X32-NEXT: sete %al ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl entry: %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) - %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 - ret i8 %1 + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out0 + %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %out1 + %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %2, <2 x i64>* %out2 + %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %2, <2 x i64>* %out3 + %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %2, <2 x i64>* %out4 + %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %2, <2 x i64>* %out5 + %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + store <2 x i64> %2, <2 x i64>* %out6 + %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 + store <2 x i64> %2, <2 x i64>* %out7 + %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %9 +} + +define i8 @test_mm_aesdecwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind { +; X64-LABEL: test_mm_aesdecwide256kl_u8: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; X64-NEXT: aesdecwide256kl (%rdi) +; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm1, (%rcx) +; X64-NEXT: movaps %xmm1, (%r8) +; X64-NEXT: movaps %xmm1, (%r9) +; X64-NEXT: movaps %xmm1, (%rbx) +; X64-NEXT: movaps %xmm1, (%r11) +; X64-NEXT: movaps %xmm1, (%r10) +; X64-NEXT: popq %rbx +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesdecwide256kl_u8: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: vmovaps 24(%ebp), %xmm3 +; X32-NEXT: vmovaps 40(%ebp), %xmm4 +; X32-NEXT: vmovaps 56(%ebp), %xmm5 +; X32-NEXT: vmovaps 72(%ebp), %xmm6 +; X32-NEXT: vmovaps 88(%ebp), %xmm7 +; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: aesdecwide256kl (%eax) +; X32-NEXT: movl 104(%ebp), %eax +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: movl 108(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 112(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 116(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 120(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 124(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 128(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 132(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: sete %al +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +entry: + %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out0 + %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %out1 + %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %2, <2 x i64>* %out2 + %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %2, <2 x i64>* %out3 + %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %2, <2 x i64>* %out4 + %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %2, <2 x i64>* %out5 + %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + store <2 x i64> %2, <2 x i64>* %out6 + %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 + store <2 x i64> %2, <2 x i64>* %out7 + %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %9 } From 8ae4842669f1fee33d22cc3f0c895c7839d05aef Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 2 Oct 2020 18:19:51 -0700 Subject: [PATCH 446/544] [X86] Move MWAITX_DAG ISD opcode so it is not in the strict FP range. Add a comment to hopefully prevent anyone else from making the same mistake. --- llvm/lib/Target/X86/X86ISelLowering.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7c977ce9e3fa1..f8de2f7d0e79b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -710,6 +710,9 @@ namespace llvm { // For avx512-vp2intersect VP2INTERSECT, + // Mwaitx builtin is lowered to this if the base pointer needs saving. + MWAITX_DAG, + /// X86 strict FP compare instructions. STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCMPS, @@ -749,8 +752,8 @@ namespace llvm { STRICT_CVTPS2PH, STRICT_CVTPH2PS, - // Mwaitx builtin is lowered to this if the base pointer needs saving. - MWAITX_DAG, + // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and + // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, From e25bf2592060e7751f8b14522c97081ce2047175 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 22 Sep 2020 01:01:16 -0700 Subject: [PATCH 447/544] [Driver] Move detectLibcxxIncludePath to ToolChain This helper method is useful even outside of Gnu toolchains, so move it to ToolChain so it can be reused in other toolchains such as Fuchsia. Differential Revision: https://reviews.llvm.org/D88452 --- clang/include/clang/Driver/ToolChain.h | 3 +++ clang/lib/Driver/ToolChain.cpp | 23 +++++++++++++++++++ clang/lib/Driver/ToolChains/Fuchsia.cpp | 4 ++-- clang/lib/Driver/ToolChains/Gnu.cpp | 22 +----------------- .../basic_fuchsia_tree/include/c++/v1/.keep | 0 5 files changed, 29 insertions(+), 23 deletions(-) create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/include/c++/v1/.keep diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 7495e08fe6e64..db4c4a7302325 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -575,6 +575,9 @@ class ToolChain { // given compilation arguments. virtual UnwindLibType GetUnwindLibType(const llvm::opt::ArgList &Args) const; + // Detect the highest available version of libc++ in base path. + virtual std::string detectLibcxxIncludePath(StringRef Base) const; + /// AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set /// the include paths to use for the given C++ standard library type. virtual void diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8991216da6765..8e98e32068808 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -924,6 +924,29 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs, } } +std::string ToolChain::detectLibcxxIncludePath(StringRef Base) const { + std::error_code EC; + int MaxVersion = 0; + std::string MaxVersionString; + for (llvm::vfs::directory_iterator LI = getVFS().dir_begin(Base, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef VersionText = llvm::sys::path::filename(LI->path()); + int Version; + if (VersionText[0] == 'v' && + !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { + if (Version > MaxVersion) { + MaxVersion = Version; + MaxVersionString = std::string(VersionText); + } + } + } + if (!MaxVersion) + return ""; + SmallString<128> P(Base); + llvm::sys::path::append(P, MaxVersionString); + return std::string(P.str()); +} + void ToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Header search paths should be handled by each of the subclasses. diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 781179be39a36..e5f23ee385559 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -319,8 +319,8 @@ void Fuchsia::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: { SmallString<128> P(getDriver().Dir); - llvm::sys::path::append(P, "..", "include", "c++", "v1"); - addSystemInclude(DriverArgs, CC1Args, P.str()); + llvm::sys::path::append(P, "..", "include", "c++"); + addSystemInclude(DriverArgs, CC1Args, detectLibcxxIncludePath(P.str())); break; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f3843685a522b..3778b6f297ed2 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2877,31 +2877,11 @@ void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, } } -static std::string DetectLibcxxIncludePath(llvm::vfs::FileSystem &vfs, - StringRef base) { - std::error_code EC; - int MaxVersion = 0; - std::string MaxVersionString; - for (llvm::vfs::directory_iterator LI = vfs.dir_begin(base, EC), LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef VersionText = llvm::sys::path::filename(LI->path()); - int Version; - if (VersionText[0] == 'v' && - !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { - if (Version > MaxVersion) { - MaxVersion = Version; - MaxVersionString = std::string(VersionText); - } - } - } - return MaxVersion ? (base + "/" + MaxVersionString).str() : ""; -} - void Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { auto AddIncludePath = [&](std::string Path) { - std::string IncludePath = DetectLibcxxIncludePath(getVFS(), Path); + std::string IncludePath = detectLibcxxIncludePath(Path); if (IncludePath.empty() || !getVFS().exists(IncludePath)) return false; addSystemInclude(DriverArgs, CC1Args, IncludePath); diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/include/c++/v1/.keep b/clang/test/Driver/Inputs/basic_fuchsia_tree/include/c++/v1/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d From a0a8f83718454186686f9c11db72408cc36482e3 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Fri, 2 Oct 2020 16:30:39 -0500 Subject: [PATCH 448/544] [PATCH] Fix typo (NFC) --- clang/test/Driver/riscv-cpus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c index c22f6cce2cf60..15cd212e4fb40 100644 --- a/clang/test/Driver/riscv-cpus.c +++ b/clang/test/Driver/riscv-cpus.c @@ -28,7 +28,7 @@ // MCPU-ABI-SIFIVE-U54: "-target-feature" "+c" "-target-feature" "+64bit" // MCPU-ABI-SIFIVE-U54: "-target-abi" "lp64" -// march overwirte mcpu's default march +// march overwrite mcpu's default march // RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=sifive-e31 -march=rv32imc | FileCheck -check-prefix=MCPU-MARCH %s // MCPU-MARCH: "-nostdsysteminc" "-target-cpu" "sifive-e31" "-target-feature" "+m" "-target-feature" "+c" // MCPU-MARCH: "-target-abi" "ilp32" From e2dd86bbfcb4c1888d5e0ff6256a51c906e621cb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 2 Oct 2020 19:10:29 -0700 Subject: [PATCH 449/544] [X86] Key Locker instructions should use VR128 regclass not VR128X. --- llvm/lib/Target/X86/X86InstrKL.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td index 452410891bd86..77e011fe14d63 100644 --- a/llvm/lib/Target/X86/X86InstrKL.td +++ b/llvm/lib/Target/X86/X86InstrKL.td @@ -18,9 +18,9 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { let Uses = [XMM0, EAX] in { - def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128X:$src1, VR128X:$src2), + def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "loadiwkey\t{$src2, $src1|$src1, $src2}", - [(int_x86_loadiwkey EAX, XMM0, VR128X:$src1, VR128X:$src2)]>, T8XS; + [(int_x86_loadiwkey EAX, XMM0, VR128:$src1, VR128:$src2)]>, T8XS; } let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6] in { @@ -35,16 +35,16 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { let Constraints = "$src1 = $dst", Defs = [EFLAGS] in { - def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesenc128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; - def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesdec128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; - def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesenc256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; - def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, opaquemem:$src2), + def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), "aesdec256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; } From baaada39c261bb30702860d8736061e79a2c4420 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 3 Oct 2020 10:37:36 +0200 Subject: [PATCH 450/544] [MemCpyOpt] Remove unnecessary -dse from test (NFC) This one doesn't even have any dead stores to eliminate... --- llvm/test/Transforms/MemCpyOpt/invariant.start.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll index 1bab2f65799aa..a8a898778920d 100644 --- a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll +++ b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start -; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" From fbf818724f507ec4b034f4056c7ed4934f43e1f8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 3 Oct 2020 11:27:33 +0200 Subject: [PATCH 451/544] [MemCpyOpt] Make moveUp() a member method (NFC) So we don't have to pass through more parameters in the future. --- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 1 + .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 23 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index ea6f37192d5eb..5426482ff5263 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -69,6 +69,7 @@ class MemCpyOptPass : public PassInfoMixin { bool processByValArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); + bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index b8c0d20d03218..f14f3d4515dee 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -462,11 +462,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // It will lift the store and its argument + that anything that // may alias with these. // The method returns true if it was successful. -static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, - const LoadInst *LI) { +bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { // If the store alias this position, early bail out. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (isModOrRefSet(AA.getModRefInfo(P, StoreLoc))) + if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) return false; // Keep track of the arguments of all instruction we plan to lift @@ -490,19 +489,19 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { auto *C = &*I; - bool MayAlias = isModOrRefSet(AA.getModRefInfo(C, None)); + bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); bool NeedLift = false; if (Args.erase(C)) NeedLift = true; else if (MayAlias) { - NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) { - return isModOrRefSet(AA.getModRefInfo(C, ML)); + NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { + return isModOrRefSet(AA->getModRefInfo(C, ML)); }); if (!NeedLift) - NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) { - return isModOrRefSet(AA.getModRefInfo(C, Call)); + NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { + return isModOrRefSet(AA->getModRefInfo(C, Call)); }); } @@ -512,18 +511,18 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, if (MayAlias) { // Since LI is implicitly moved downwards past the lifted instructions, // none of them may modify its source. - if (isModSet(AA.getModRefInfo(C, LoadLoc))) + if (isModSet(AA->getModRefInfo(C, LoadLoc))) return false; else if (const auto *Call = dyn_cast(C)) { // If we can't lift this before P, it's game over. - if (isModOrRefSet(AA.getModRefInfo(P, Call))) + if (isModOrRefSet(AA->getModRefInfo(P, Call))) return false; Calls.push_back(Call); } else if (isa(C) || isa(C) || isa(C)) { // If we can't lift this before P, it's game over. auto ML = MemoryLocation::get(C); - if (isModOrRefSet(AA.getModRefInfo(P, ML))) + if (isModOrRefSet(AA->getModRefInfo(P, ML))) return false; MemLocs.push_back(ML); @@ -599,7 +598,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // position if nothing alias the store memory after this and the store // destination is not in the range. if (P && P != SI) { - if (!moveUp(*AA, SI, P, LI)) + if (!moveUp(SI, P, LI)) P = nullptr; } From 49e34e239b3632bd26d6c2ac648915bfed5b05fc Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 3 Oct 2020 12:25:34 +0100 Subject: [PATCH 452/544] [ARM] Test to show incorrect pointer info. NFC --- llvm/test/CodeGen/Thumb2/vmovdrroffset.ll | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/vmovdrroffset.ll diff --git a/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll b/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll new file mode 100644 index 0000000000000..07656e60a568d --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s + +; This test was getting the offset of a store's pointer info incorrect, leading to the +; incorrect aliasing info an the store moving past a dependant load. + +define arm_aapcs_vfpcc double @zero(double %a, double %b, double %c) { +; CHECK-LABEL: zero: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vmov.f32 s16, s2 +; CHECK-NEXT: vmov.f32 s18, s0 +; CHECK-NEXT: vmov.f32 s17, s3 +; CHECK-NEXT: vmov.f32 s19, s1 +; CHECK-NEXT: bl sqrt +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: bl __aeabi_dadd +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #7] +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: eor r0, r0, #128 +; CHECK-NEXT: strb.w r0, [sp, #7] +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: bl __aeabi_dadd +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __aeabi_ddiv +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, pc} +entry: + %call = tail call nnan nsz arm_aapcs_vfpcc double @sqrt(double %a) + %0 = fadd nnan nsz double %call, %b + %sub3 = fneg nnan nsz double %0 + %mul4 = fmul nnan nsz double %a, 2.000000e+00 + %div = fdiv nnan nsz double %sub3, %mul4 + ret double %div +} + +declare arm_aapcs_vfpcc double @sqrt(double) From 0ce6d6b46eb7040283ad0800c5533672fbfb9bac Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sat, 3 Oct 2020 14:31:46 +0200 Subject: [PATCH 453/544] [Sema] List conversion validate character array. The function `TryListConversion` didn't properly validate the following part of the standard: Otherwise, if the parameter type is a character array [... ] and the initializer list has a single element that is an appropriately-typed string literal (8.5.2 [dcl.init.string]), the implicit conversion sequence is the identity conversion. This caused the following call to `f()` to be ambiguous. void f(int(&&)[1]); void f(unsigned(&&)[1]); void g(unsigned i) { f({i}); } This issue only occurs when the initializer list had one element. Differential Revision: https://reviews.llvm.org/D87561 --- clang/include/clang/Sema/Sema.h | 2 ++ clang/lib/Sema/SemaInit.cpp | 4 +++ clang/lib/Sema/SemaOverload.cpp | 25 +++++++------- clang/test/CXX/drs/dr14xx.cpp | 56 +++++++++++++++++++++++++++++++ clang/test/SemaObjCXX/overload.mm | 14 ++++++++ 5 files changed, 89 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9559075935d8d..3516d1c1b717b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3200,6 +3200,8 @@ class Sema final { bool CanPerformAggregateInitializationForOverloadResolution( const InitializedEntity &Entity, InitListExpr *From); + bool IsStringInit(Expr *Init, const ArrayType *AT); + bool CanPerformCopyInitialization(const InitializedEntity &Entity, ExprResult Init); ExprResult PerformCopyInitialization(const InitializedEntity &Entity, diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index a9f707b8cf203..751b785ce531e 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -141,6 +141,10 @@ static StringInitFailureKind IsStringInit(Expr *init, QualType declType, return IsStringInit(init, arrayType, Context); } +bool Sema::IsStringInit(Expr *Init, const ArrayType *AT) { + return ::IsStringInit(Init, AT, Context) == SIF_None; +} + /// Update the type of a string literal, including any surrounding parentheses, /// to match the type of the object which it is initializing. static void updateStringLiteralType(Expr *E, QualType Ty) { diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 95d110e754f45..0c252a488fea3 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -4984,18 +4984,19 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, InOverloadResolution, AllowObjCWritebackConversion); } - // FIXME: Check the other conditions here: array of character type, - // initializer is a string literal. - if (ToType->isArrayType()) { - InitializedEntity Entity = - InitializedEntity::InitializeParameter(S.Context, ToType, - /*Consumed=*/false); - if (S.CanPerformCopyInitialization(Entity, From)) { - Result.setStandard(); - Result.Standard.setAsIdentityConversion(); - Result.Standard.setFromType(ToType); - Result.Standard.setAllToTypes(ToType); - return Result; + + if (const auto *AT = S.Context.getAsArrayType(ToType)) { + if (S.IsStringInit(From->getInit(0), AT)) { + InitializedEntity Entity = + InitializedEntity::InitializeParameter(S.Context, ToType, + /*Consumed=*/false); + if (S.CanPerformCopyInitialization(Entity, From)) { + Result.setStandard(); + Result.Standard.setAsIdentityConversion(); + Result.Standard.setFromType(ToType); + Result.Standard.setAllToTypes(ToType); + return Result; + } } } } diff --git a/clang/test/CXX/drs/dr14xx.cpp b/clang/test/CXX/drs/dr14xx.cpp index 50b0396a4b798..13bc0d148d773 100644 --- a/clang/test/CXX/drs/dr14xx.cpp +++ b/clang/test/CXX/drs/dr14xx.cpp @@ -334,6 +334,22 @@ namespace dr1467 { // dr1467: 3.7 c++11 X x; X x2{x}; + + void f1(int); // expected-note {{candidate function}} + void f1(std::initializer_list) = delete; // expected-note {{candidate function has been explicitly deleted}} + void g1() { f1({42}); } // expected-error {{call to deleted function 'f1'}} + + template + struct Pair { + Pair(T, U); + }; + struct String { + String(const char *); + }; + + void f2(Pair); // expected-note {{candidate function}} + void f2(std::initializer_list) = delete; // expected-note {{candidate function has been explicitly deleted}} + void g2() { f2({"foo", "bar"}); } // expected-error {{call to deleted function 'f2'}} } // dr_example namespace nonaggregate { @@ -379,6 +395,46 @@ namespace dr1467 { // dr1467: 3.7 c++11 struct Value { Value(Pair); Value(TwoPairs); }; void f() { Value{{{1,2},{3,4}}}; } } + namespace NonAmbiguous { + // The original implementation made this case ambigious due to the special + // handling of one element initialization lists. + void f(int(&&)[1]); + void f(unsigned(&&)[1]); + + void g(unsigned i) { + f({i}); + } + } // namespace NonAmbiguous + +#if __cplusplus >= 201103L + namespace StringLiterals { + // When the array size is 4 the call will attempt to bind an lvalue to an + // rvalue and fail. Therefore #2 will be called. (rsmith will bring this + // issue to CWG) + void f(const char(&&)[4]); // expected-note 5 {{no known conversion}} + void f(const char(&&)[5]) = delete; // expected-note 2 {{candidate function has been explicitly deleted}} expected-note 3 {{no known conversion}} + void f(const wchar_t(&&)[4]); // expected-note 5 {{no known conversion}} + void f(const wchar_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}} expected-note 4 {{no known conversion}} +#if __cplusplus >= 202002L + void f2(const char8_t(&&)[4]); // expected-note {{no known conversion}} + void f2(const char8_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}} +#endif + void f(const char16_t(&&)[4]); // expected-note 5 {{no known conversion}} + void f(const char16_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}} expected-note 4 {{no known conversion}} + void f(const char32_t(&&)[4]); // expected-note 5 {{no known conversion}} + void f(const char32_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}} expected-note 4 {{no known conversion}} + void g() { + f({"abc"}); // expected-error {{call to deleted function 'f'}} + f({((("abc")))}); // expected-error {{call to deleted function 'f'}} + f({L"abc"}); // expected-error {{call to deleted function 'f'}} +#if __cplusplus >= 202002L + f2({u8"abc"}); // expected-error {{call to deleted function 'f2'}} +#endif + f({uR"(abc)"}); // expected-error {{call to deleted function 'f'}} + f({(UR"(abc)")}); // expected-error {{call to deleted function 'f'}} + } + } // namespace StringLiterals +#endif } // dr1467 namespace dr1490 { // dr1490: 3.7 c++11 diff --git a/clang/test/SemaObjCXX/overload.mm b/clang/test/SemaObjCXX/overload.mm index f3c06b4f22883..d6485b3ac28c6 100644 --- a/clang/test/SemaObjCXX/overload.mm +++ b/clang/test/SemaObjCXX/overload.mm @@ -201,3 +201,17 @@ void test(NSDictionary *d1, NSDictionary *d2, NSMutableDictionary Date: Sat, 3 Oct 2020 16:01:48 +0300 Subject: [PATCH 454/544] [NFCI][clang-tidy] FunctionCognitiveComplexityCheck::check(): try to fix windows arm build bots http://lab.llvm.org:8011/builders/llvm-clang-win-x-armv7l/builds/1482/steps/build-llvm-project/logs/stdio http://lab.llvm.org:8011/builders/llvm-clang-win-x-aarch64/builds/3285/steps/build-llvm-project/logs/stdio --- .../clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp index 548aec7543ac9..96fe9a2e29a4f 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp @@ -533,7 +533,7 @@ void FunctionCognitiveComplexityCheck::check( // Increase, on the other hand, can be 0. diag(Detail.Loc, Msgs[MsgId], DiagnosticIDs::Note) - << Increase << Detail.Nesting << 1 + Detail.Nesting; + << (unsigned)Increase << (unsigned)Detail.Nesting << 1 + Detail.Nesting; } } From 3aa93f690b097257e9a2e48b133c4f413bc3ed92 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Oct 2020 14:52:25 +0100 Subject: [PATCH 455/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - support for 'partial' bswap patterns (PR47191) (Reapplied) If we're bswap'ing some bytes and zero'ing the remainder we can perform this as a bswap+mask which helps us match 'partial' bswaps as a first step towards folding into a more complex bswap pattern. Reapplied with early-out if recognizeBSwapOrBitReverseIdiom collects a source wider than the result type. Differential Revision: https://reviews.llvm.org/D88578 --- .../InstCombine/InstCombineAndOrXor.cpp | 35 ++--- llvm/lib/Transforms/Utils/Local.cpp | 43 +++++- llvm/test/Transforms/InstCombine/bswap.ll | 123 +++--------------- 3 files changed, 66 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index cbc3f5a2532f7..edb2dc8881c7b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2046,29 +2046,18 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) { Op1 = Ext->getOperand(0); // (A | B) | C and A | (B | C) -> bswap if possible. - bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())); - - // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value())); - - // (A & B) | (C & D) -> bswap if possible. - bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value())); - - // (A << B) | (C & D) -> bswap if possible. - // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a - // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935, - // C2 = 8 for i32). - // This pattern can occur when the operands of the 'or' are not canonicalized - // for some reason (not having only one use, for example). - bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value()))) || - (match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value()))); - - if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh) + bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); + + // (A >> B) | C and (A << B) | C -> bswap if possible. + bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || + match(Op1, m_LogicalShift(m_Value(), m_Value())); + + // (A & B) | C and A | (B & C) -> bswap if possible. + bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || + match(Op1, m_And(m_Value(), m_Value())); + + if (!OrWithOrs && !OrWithShifts && !OrWithAnds) return nullptr; SmallVector Insts; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0fd0dfa24ce96..1c4cbc7783208 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2940,6 +2940,24 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift // amount (modulo). // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -3029,13 +3047,22 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size()); } + // Check BitProvenance hasn't found a source larger than the result type. + unsigned DemandedBW = DemandedTy->getBitWidth(); + if (DemandedBW > ITy->getBitWidth()) + return false; + // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - unsigned DemandedBW = DemandedTy->getBitWidth(); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; bool OKForBitReverse = MatchBitReversals; for (unsigned BitIdx = 0; (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, DemandedBW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], @@ -3061,12 +3088,18 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( Provider = Trunc; } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } // We may need to zeroextend back to the result type. - if (ITy != CI->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index aac34178efd46..d6f0792504887 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -534,14 +534,8 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { define i32 @partial_bswap(i32 %x) { ; CHECK-LABEL: @partial_bswap( -; CHECK-NEXT: [[X3:%.*]] = shl i32 [[X:%.*]], 24 -; CHECK-NEXT: [[A2:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[X2:%.*]] = and i32 [[A2]], 16711680 -; CHECK-NEXT: [[X32:%.*]] = or i32 [[X3]], [[X2]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[X]], -65536 -; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[T1]]) -; CHECK-NEXT: [[R:%.*]] = or i32 [[X32]], [[T2]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x3 = shl i32 %x, 24 %a2 = shl i32 %x, 8 @@ -578,10 +572,9 @@ declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -606,13 +599,9 @@ define i64 @bswap_and_mask_1(i64 %0) { define i64 @bswap_and_mask_2(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -735,28 +724,8 @@ define i32 @funnel_binary(i32 %abcd) { define i64 @PR47191_problem1(i64 %0) { ; CHECK-LABEL: @PR47191_problem1( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -784,28 +753,8 @@ define i64 @PR47191_problem1(i64 %0) { define i64 @PR47191_problem2(i64 %0) { ; CHECK-LABEL: @PR47191_problem2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -833,28 +782,8 @@ define i64 @PR47191_problem2(i64 %0) { define i64 @PR47191_problem3(i64 %0) { ; CHECK-LABEL: @PR47191_problem3( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -882,28 +811,8 @@ define i64 @PR47191_problem3(i64 %0) { define i64 @PR47191_problem4(i64 %0) { ; CHECK-LABEL: @PR47191_problem4( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 From 07028cd5dbb8417fb41121a7e75290fab00f65fc Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Sat, 3 Oct 2020 10:08:44 -0400 Subject: [PATCH 456/544] modernize-use-trailing-return-type fix for PR44206 Prevent rewrite when an unqualified id in a typedef type collides with a function argument name. Fixes PR44206. --- .../modernize/UseTrailingReturnTypeCheck.cpp | 4 ++++ .../checkers/modernize-use-trailing-return-type.cpp | 13 +++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index b66e24d58b2f6..ff2f62e6545b4 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -66,6 +66,10 @@ struct UnqualNameVisitor : public RecursiveASTVisitor { ->getName())) return false; break; + case TypeLoc::Typedef: + if (VisitUnqualName( + TL.getAs().getTypePtr()->getDecl()->getName())) + return false; default: break; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-trailing-return-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-trailing-return-type.cpp index d5087b598f29a..d9efc006b22ef 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-trailing-return-type.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-trailing-return-type.cpp @@ -9,10 +9,16 @@ namespace std { class string; - class ostream; + template + class basic_ostream; + + using ostream = basic_ostream; template auto declval() -> T; + + template + class tuple; } // @@ -527,6 +533,10 @@ std::array j6(unsigned Size); std::array j8(unsigned Size); // CHECK-MESSAGES: :[[@LINE-1]]:44: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}}std::array j8(unsigned Size);{{$}} +using std::ostream; +std::tuple& operator<<(ostream& ostream, float i); +// CHECK-MESSAGES: :[[@LINE-1]]:40: warning: use a trailing return type for this function [modernize-use-trailing-return-type] +// CHECK-FIXES: {{^}}std::tuple& operator<<(ostream& ostream, float i);{{$}} class CC { int Object; @@ -552,7 +562,6 @@ Object DD::g() { // bug 44206, no rewrite should happen due to collision with parameter name // -using std::ostream; ostream& operator<<(ostream& ostream, int i); // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}}ostream& operator<<(ostream& ostream, int i);{{$}} From 35a2a042ddd127205455fc26ae516ccc513fd5cf Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Fri, 2 Oct 2020 09:53:35 +0100 Subject: [PATCH 457/544] [flang][NFC] Fix build errors for clang-10 This patch fixes one worning. Since Flang sets `-Werror`, that's sufficient for a build to fail. As per flang/README.md, Clang-10 is one of the officially supported compilers. Differential Revision: https://reviews.llvm.org/D88723 --- flang/lib/Lower/OpenACC.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index e1fb724fb92e1..b39fe9b695f83 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -99,7 +99,6 @@ static void genACC(Fortran::lower::AbstractConverter &converter, if (loopDirective.v == llvm::acc::ACCD_loop) { auto &firOpBuilder = converter.getFirOpBuilder(); auto currentLocation = converter.getCurrentLocation(); - llvm::ArrayRef argTy; // Add attribute extracted from clauses. const auto &accClauseList = From 347fd9955af3fff2622d8349a59974ecc2237ec1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Oct 2020 15:29:05 +0100 Subject: [PATCH 458/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - use generic CreateIntegerCast Try to appease buildbots breakages due to D88578 --- llvm/lib/Transforms/Utils/Local.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 1c4cbc7783208..d17ce2fb47c80 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3083,7 +3083,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( // We may need to truncate the provider. if (DemandedTy != Provider->getType()) { auto *Trunc = - CastInst::Create(Instruction::Trunc, Provider, DemandedTy, "trunc", I); + CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I); InsertedInsts.push_back(Trunc); Provider = Trunc; } @@ -3099,7 +3099,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( // We may need to zeroextend back to the result type. if (ITy != Result->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); + auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I); InsertedInsts.push_back(ExtInst); } From 089e628b61f929ccd26565cd4118395f0a0273c3 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Sat, 3 Oct 2020 11:10:26 -0400 Subject: [PATCH 459/544] Add a break statement to appease the build bots; NFC --- .../clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index ff2f62e6545b4..bbb1e8c65a4f1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -70,6 +70,7 @@ struct UnqualNameVisitor : public RecursiveASTVisitor { if (VisitUnqualName( TL.getAs().getTypePtr()->getDecl()->getName())) return false; + break; default: break; } From aacfe2be53d441d256091b2b495875a69fc2f285 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Oct 2020 16:26:29 +0100 Subject: [PATCH 460/544] [InstCombine] recognizeBSwapOrBitReverseIdiom - add vector support Add basic vector handling to recognizeBSwapOrBitReverseIdiom/collectBitParts - this works at the element level, all vector element operations must match (splat constants etc.) and there is no cross-element support (insert/extract/shuffle etc.). --- llvm/lib/Transforms/Utils/Local.cpp | 24 +++++---- llvm/test/Transforms/InstCombine/bswap.ll | 59 +++++------------------ 2 files changed, 27 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index d17ce2fb47c80..eea347aa8fe6a 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2803,7 +2803,7 @@ struct BitPart { /// Analyze the specified subexpression and see if it is capable of providing /// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in /// the output of the expression came from a corresponding bit in some other /// value. This function is recursive, and the end result is a mapping of /// bitnumber to bitnumber. It is the caller's responsibility to validate that @@ -2815,6 +2815,10 @@ struct BitPart { /// BitPart is returned with Provider set to %X and Provenance[24-31] set to /// [0-7]. /// +/// For vector types, all analysis is performed at the per-element level. No +/// cross-element analysis is supported (shuffle/insertion/reduction), and all +/// constant masks must be splatted across all elements. +/// /// To avoid revisiting values, the BitPart results are memoized into the /// provided map. To avoid unnecessary copying of BitParts, BitParts are /// constructed in-place in the \c BPS map. Because of this \c BPS needs to @@ -3019,14 +3023,14 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( return false; if (!MatchBSwaps && !MatchBitReversals) return false; - IntegerType *ITy = dyn_cast(I->getType()); - if (!ITy || ITy->getBitWidth() > 128) - return false; // Can't do vectors or integers > 128 bits. + Type *ITy = I->getType(); + if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) + return false; // Can't do integer/elements > 128 bits. - IntegerType *DemandedTy = ITy; + Type *DemandedTy = ITy; if (I->hasOneUse()) if (auto *Trunc = dyn_cast(I->user_back())) - DemandedTy = cast(Trunc->getType()); + DemandedTy = Trunc->getType(); // Try to find all the pieces corresponding to the bswap. std::map> BPS; @@ -3044,12 +3048,14 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( BitProvenance = BitProvenance.drop_back(); if (BitProvenance.empty()) return false; // TODO - handle null value? - DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size()); + DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size()); + if (auto *IVecTy = dyn_cast(ITy)) + DemandedTy = VectorType::get(DemandedTy, IVecTy); } // Check BitProvenance hasn't found a source larger than the result type. - unsigned DemandedBW = DemandedTy->getBitWidth(); - if (DemandedBW > ITy->getBitWidth()) + unsigned DemandedBW = DemandedTy->getScalarSizeInBits(); + if (DemandedBW > ITy->getScalarSizeInBits()) return false; // Now, is the bit permutation correct for a bswap or a bitreverse? We can diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index d6f0792504887..effbc66499c02 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -22,15 +22,7 @@ define i32 @test1(i32 %i) { define <2 x i32> @test1_vector(<2 x i32> %i) { ; CHECK-LABEL: @test1_vector( -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[I:%.*]], -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[I]], -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], -; CHECK-NEXT: [[T5:%.*]] = or <2 x i32> [[T1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = shl <2 x i32> [[I]], -; CHECK-NEXT: [[T8:%.*]] = and <2 x i32> [[T7]], -; CHECK-NEXT: [[T9:%.*]] = or <2 x i32> [[T5]], [[T8]] -; CHECK-NEXT: [[T11:%.*]] = shl <2 x i32> [[I]], -; CHECK-NEXT: [[T12:%.*]] = or <2 x i32> [[T9]], [[T11]] +; CHECK-NEXT: [[T12:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[I:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[T12]] ; %t1 = lshr <2 x i32> %i, @@ -64,15 +56,7 @@ define i32 @test2(i32 %arg) { define <2 x i32> @test2_vector(<2 x i32> %arg) { ; CHECK-LABEL: @test2_vector( -; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], -; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], -; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], -; CHECK-NEXT: [[T6:%.*]] = or <2 x i32> [[T2]], [[T5]] -; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], -; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], -; CHECK-NEXT: [[T10:%.*]] = or <2 x i32> [[T6]], [[T9]] -; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], -; CHECK-NEXT: [[T14:%.*]] = or <2 x i32> [[T10]], [[T12]] +; CHECK-NEXT: [[T14:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[ARG:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[T14]] ; %t2 = shl <2 x i32> %arg, @@ -225,15 +209,7 @@ define i32 @test6(i32 %x) nounwind readnone { define <2 x i32> @test6_vector(<2 x i32> %x) nounwind readnone { ; CHECK-LABEL: @test6_vector( -; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X]], -; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = or <2 x i32> [[X_MASK]], [[T]] -; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T3]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = shl <2 x i32> [[T4]], -; CHECK-NEXT: [[T6:%.*]] = lshr <2 x i32> [[X]], -; CHECK-NEXT: [[T7:%.*]] = or <2 x i32> [[T5]], [[T6]] +; CHECK-NEXT: [[T7:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[T7]] ; %t = shl <2 x i32> %x, @@ -381,12 +357,9 @@ define i16 @test10(i32 %a) { define <2 x i16> @test10_vector(<2 x i32> %a) { ; CHECK-LABEL: @test10_vector( -; CHECK-NEXT: [[SHR1:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[SHR1]], -; CHECK-NEXT: [[AND2:%.*]] = shl <2 x i32> [[A]], -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[AND1]], [[AND2]] -; CHECK-NEXT: [[CONV:%.*]] = trunc <2 x i32> [[OR]] to <2 x i16> -; CHECK-NEXT: ret <2 x i16> [[CONV]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]]) +; CHECK-NEXT: ret <2 x i16> [[REV]] ; %shr1 = lshr <2 x i32> %a, %and1 = and <2 x i32> %shr1, @@ -457,12 +430,10 @@ define i64 @PR39793_bswap_u64_as_u16(i64 %0) { define <2 x i64> @PR39793_bswap_u64_as_u16_vector(<2 x i64> %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u16_vector( -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP3]], [[TMP5]] -; CHECK-NEXT: ret <2 x i64> [[TMP6]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[TMP0:%.*]] to <2 x i16> +; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[REV]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %2 = lshr <2 x i64> %0, %3 = and <2 x i64> %2, @@ -550,14 +521,8 @@ declare i32 @llvm.bswap.i32(i32) define <2 x i32> @partial_bswap_vector(<2 x i32> %x) { ; CHECK-LABEL: @partial_bswap_vector( -; CHECK-NEXT: [[X3:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[A2:%.*]] = shl <2 x i32> [[X]], -; CHECK-NEXT: [[X2:%.*]] = and <2 x i32> [[A2]], -; CHECK-NEXT: [[X32:%.*]] = or <2 x i32> [[X3]], [[X2]] -; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[X]], -; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[T1]]) -; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[X32]], [[T2]] -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %x3 = shl <2 x i32> %x, %a2 = shl <2 x i32> %x, From 7feafa0286f1f5e059d70a9a9f4168f32db3b444 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 3 Oct 2020 16:47:50 +0100 Subject: [PATCH 461/544] [ARM] Fix pointer offset when splitting stores from VMOVDRR We were not accounting for the pointer offset when splitting a store from a VMOVDRR node, which could lead to incorrect aliasing info. In this case it is the fneg via integer arithmetic that gives us a store->load pair that we started getting wrong. Differential Revision: https://reviews.llvm.org/D88653 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 6 +++--- llvm/test/CodeGen/Thumb2/vmovdrroffset.ll | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 774f057b625b9..798ecf2487637 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14546,15 +14546,15 @@ static SDValue PerformSTORECombine(SDNode *N, SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore( St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0), - BasePtr, St->getPointerInfo(), St->getAlignment(), + BasePtr, St->getPointerInfo(), St->getOriginalAlign(), St->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(isBigEndian ? 0 : 1), - OffsetPtr, St->getPointerInfo(), - std::min(4U, St->getAlignment() / 2), + OffsetPtr, St->getPointerInfo().getWithOffset(4), + St->getOriginalAlign(), St->getMemOperand()->getFlags()); } diff --git a/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll b/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll index 07656e60a568d..9d0c9c0ed35cd 100644 --- a/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll +++ b/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll @@ -21,9 +21,9 @@ define arm_aapcs_vfpcc double @zero(double %a, double %b, double %c) { ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: bl __aeabi_dadd +; CHECK-NEXT: str r1, [sp, #4] ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: ldrb.w r0, [sp, #7] -; CHECK-NEXT: str r1, [sp, #4] ; CHECK-NEXT: eor r0, r0, #128 ; CHECK-NEXT: strb.w r0, [sp, #7] ; CHECK-NEXT: vmov r0, r1, d9 From 78530ce65375fa02bc96019e5cc9d73db8adaca4 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Sat, 3 Oct 2020 08:53:43 -0700 Subject: [PATCH 462/544] Add indented raw_ostream class Class simplifies keeping track of the indentation while emitting. For every new line the current indentation is simply prefixed (if not at start of line, then it just emits as normal). Add a simple Region helper that makes it easy to have the C++ scope match the emitted scope. Use this in op doc generator and rewrite generator. Differential Revision: https://reviews.llvm.org/D84107 --- mlir/include/mlir/Support/IndentedOstream.h | 102 +++++++ mlir/lib/Support/CMakeLists.txt | 8 + mlir/lib/Support/IndentedOstream.cpp | 65 +++++ mlir/tools/mlir-tblgen/CMakeLists.txt | 1 + mlir/tools/mlir-tblgen/OpDocGen.cpp | 40 +-- mlir/tools/mlir-tblgen/RewriterGen.cpp | 250 +++++++++--------- mlir/unittests/Support/CMakeLists.txt | 6 + .../unittests/Support/IndentedOstreamTest.cpp | 110 ++++++++ 8 files changed, 416 insertions(+), 166 deletions(-) create mode 100644 mlir/include/mlir/Support/IndentedOstream.h create mode 100644 mlir/lib/Support/IndentedOstream.cpp create mode 100644 mlir/unittests/Support/CMakeLists.txt create mode 100644 mlir/unittests/Support/IndentedOstreamTest.cpp diff --git a/mlir/include/mlir/Support/IndentedOstream.h b/mlir/include/mlir/Support/IndentedOstream.h new file mode 100644 index 0000000000000..20161c1f3898f --- /dev/null +++ b/mlir/include/mlir/Support/IndentedOstream.h @@ -0,0 +1,102 @@ +//===- IndentedOstream.h - raw ostream wrapper to indent --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// raw_ostream subclass that keeps track of indentation for textual output +// where indentation helps readability. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_SUPPORT_INDENTEDOSTREAM_H_ +#define MLIR_SUPPORT_INDENTEDOSTREAM_H_ + +#include "mlir/Support/LLVM.h" +#include "llvm/Support/raw_ostream.h" + +namespace mlir { + +/// raw_ostream subclass that simplifies indention a sequence of code. +class raw_indented_ostream : public raw_ostream { +public: + explicit raw_indented_ostream(llvm::raw_ostream &os) : os(os) { + SetUnbuffered(); + } + + /// Simple RAII struct to use to indentation around entering/exiting region. + struct DelimitedScope { + explicit DelimitedScope(raw_indented_ostream &os, StringRef open = "", + StringRef close = "") + : os(os), open(open), close(close) { + os << open; + os.indent(); + } + ~DelimitedScope() { + os.unindent(); + os << close; + } + + raw_indented_ostream &os; + + private: + llvm::StringRef open, close; + }; + + /// Returns DelimitedScope. + DelimitedScope scope(StringRef open = "", StringRef close = "") { + return DelimitedScope(*this, open, close); + } + + /// Re-indents by removing the leading whitespace from the first non-empty + /// line from every line of the the string, skipping over empty lines at the + /// start. + raw_indented_ostream &reindent(StringRef str); + + /// Increases the indent and returning this raw_indented_ostream. + raw_indented_ostream &indent() { + currentIndent += indentSize; + return *this; + } + + /// Decreases the indent and returning this raw_indented_ostream. + raw_indented_ostream &unindent() { + currentIndent = std::max(0, currentIndent - indentSize); + return *this; + } + + /// Emits whitespace and sets the indendation for the stream. + raw_indented_ostream &indent(int with) { + os.indent(with); + atStartOfLine = false; + currentIndent = with; + return *this; + } + +private: + void write_impl(const char *ptr, size_t size) override; + + /// Return the current position within the stream, not counting the bytes + /// currently in the buffer. + uint64_t current_pos() const override { return os.tell(); } + + /// Constant indent added/removed. + static constexpr int indentSize = 2; + + // Tracker for current indentation. + int currentIndent = 0; + + // The leading whitespace of the string being printed, if reindent is used. + int leadingWs = 0; + + // Tracks whether at start of line and so indent is required or not. + bool atStartOfLine = true; + + // The underlying raw_ostream. + raw_ostream &os; +}; + +} // namespace mlir +#endif // MLIR_SUPPORT_INDENTEDOSTREAM_H_ diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt index bdba990571721..16584e0821099 100644 --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_OPTIONAL_SOURCES FileUtilities.cpp + IndentedOstream.cpp MlirOptMain.cpp StorageUniquer.cpp ToolUtilities.cpp @@ -27,3 +28,10 @@ add_mlir_library(MLIROptLib MLIRParser MLIRSupport ) + +# This doesn't use add_mlir_library as it is used in mlir-tblgen and else +# mlir-tblgen ends up depending on mlir-generic-headers. +add_llvm_library(MLIRSupportIdentedOstream + IndentedOstream.cpp + + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support) diff --git a/mlir/lib/Support/IndentedOstream.cpp b/mlir/lib/Support/IndentedOstream.cpp new file mode 100644 index 0000000000000..bb3feef6c4458 --- /dev/null +++ b/mlir/lib/Support/IndentedOstream.cpp @@ -0,0 +1,65 @@ +//===- IndentedOstream.cpp - raw ostream wrapper to indent ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// raw_ostream subclass that keeps track of indentation for textual output +// where indentation helps readability. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/IndentedOstream.h" + +using namespace mlir; + +raw_indented_ostream &mlir::raw_indented_ostream::reindent(StringRef str) { + StringRef remaining = str; + // Find leading whitespace indent. + while (!remaining.empty()) { + auto split = remaining.split('\n'); + size_t indent = split.first.find_first_not_of(" \t"); + if (indent != StringRef::npos) { + leadingWs = indent; + break; + } + remaining = split.second; + } + // Print, skipping the empty lines. + *this << remaining; + leadingWs = 0; + return *this; +} + +void mlir::raw_indented_ostream::write_impl(const char *ptr, size_t size) { + StringRef str(ptr, size); + // Print out indented. + auto print = [this](StringRef str) { + if (atStartOfLine) + os.indent(currentIndent) << str.substr(leadingWs); + else + os << str.substr(leadingWs); + }; + + while (!str.empty()) { + size_t idx = str.find('\n'); + if (idx == StringRef::npos) { + if (!str.substr(leadingWs).empty()) { + print(str); + atStartOfLine = false; + } + break; + } + + auto split = + std::make_pair(str.slice(0, idx), str.slice(idx + 1, StringRef::npos)); + // Print empty new line without spaces if line only has spaces. + if (!split.first.ltrim().empty()) + print(split.first); + os << '\n'; + atStartOfLine = true; + str = split.second; + } +} diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index 46b9d81115c9b..df004adb1bed5 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -25,6 +25,7 @@ add_tablegen(mlir-tblgen MLIR set_target_properties(mlir-tblgen PROPERTIES FOLDER "Tablegenning") target_link_libraries(mlir-tblgen PRIVATE + MLIRSupportIdentedOstream MLIRTableGen) mlir_check_all_link_libraries(mlir-tblgen) diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index df78556c1c77b..ff6a290397630 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DocGenUtilities.h" +#include "mlir/Support/IndentedOstream.h" #include "mlir/TableGen/GenInfo.h" #include "mlir/TableGen/Operator.h" #include "llvm/ADT/DenseMap.h" @@ -35,39 +36,8 @@ using mlir::tblgen::Operator; // in a way the user wanted but has some additional indenting due to being // nested in the op definition. void mlir::tblgen::emitDescription(StringRef description, raw_ostream &os) { - // Determine the minimum number of spaces in a line. - size_t min_indent = -1; - StringRef remaining = description; - while (!remaining.empty()) { - auto split = remaining.split('\n'); - size_t indent = split.first.find_first_not_of(" \t"); - if (indent != StringRef::npos) - min_indent = std::min(indent, min_indent); - remaining = split.second; - } - - // Print out the description indented. - os << "\n"; - remaining = description; - bool printed = false; - while (!remaining.empty()) { - auto split = remaining.split('\n'); - if (split.second.empty()) { - // Skip last line with just spaces. - if (split.first.ltrim().empty()) - break; - } - // Print empty new line without spaces if line only has spaces, unless no - // text has been emitted before. - if (split.first.ltrim().empty()) { - if (printed) - os << "\n"; - } else { - os << split.first.substr(min_indent) << "\n"; - printed = true; - } - remaining = split.second; - } + raw_indented_ostream ros(os); + ros.reindent(description.rtrim(" \t")); } // Emits `str` with trailing newline if not empty. @@ -116,7 +86,7 @@ static void emitOpDoc(Operator op, raw_ostream &os) { // Emit the summary, syntax, and description if present. if (op.hasSummary()) - os << "\n" << op.getSummary() << "\n"; + os << "\n" << op.getSummary() << "\n\n"; if (op.hasAssemblyFormat()) emitAssemblyFormat(op.getOperationName(), op.getAssemblyFormat().trim(), os); @@ -228,7 +198,7 @@ static void emitDialectDoc(const RecordKeeper &recordKeeper, raw_ostream &os) { } os << "\n"; - for (auto dialectWithOps : dialectOps) + for (const auto &dialectWithOps : dialectOps) emitDialectDoc(dialectWithOps.first, dialectWithOps.second, dialectTypes[dialectWithOps.first], os); } diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index 9b2f35f566246..e16900227759d 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Support/IndentedOstream.h" #include "mlir/TableGen/Attribute.h" #include "mlir/TableGen/Format.h" #include "mlir/TableGen/GenInfo.h" @@ -77,11 +78,11 @@ class PatternEmitter { // Emits C++ statements for matching the `argIndex`-th argument of the given // DAG `tree` as an operand. - void emitOperandMatch(DagNode tree, int argIndex, int depth, int indent); + void emitOperandMatch(DagNode tree, int argIndex, int depth); // Emits C++ statements for matching the `argIndex`-th argument of the given // DAG `tree` as an attribute. - void emitAttributeMatch(DagNode tree, int argIndex, int depth, int indent); + void emitAttributeMatch(DagNode tree, int argIndex, int depth); // Emits C++ for checking a match with a corresponding match failure // diagnostic. @@ -184,7 +185,7 @@ class PatternEmitter { // The next unused ID for newly created values. unsigned nextValueId; - raw_ostream &os; + raw_indented_ostream os; // Format contexts containing placeholder substitutions. FmtContext fmtCtx; @@ -225,8 +226,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { // Skip the operand matching at depth 0 as the pattern rewriter already does. if (depth != 0) { // Skip if there is no defining operation (e.g., arguments to function). - os.indent(indent) << formatv("if (!castedOp{0}) return failure();\n", - depth); + os << formatv("if (!castedOp{0})\n return failure();\n", depth); } if (tree.getNumArgs() != op.getNumArgs()) { PrintFatalError(loc, formatv("op '{0}' argument number mismatch: {1} in " @@ -238,7 +238,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { // If the operand's name is set, set to that variable. auto name = tree.getSymbol(); if (!name.empty()) - os.indent(indent) << formatv("{0} = castedOp{1};\n", name, depth); + os << formatv("{0} = castedOp{1};\n", name, depth); for (int i = 0, e = tree.getNumArgs(); i != e; ++i) { auto opArg = op.getArg(i); @@ -253,24 +253,23 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { PrintFatalError(loc, error); } } - os.indent(indent) << "{\n"; + os << "{\n"; - os.indent(indent + 2) << formatv( + os.indent() << formatv( "auto *op{0} = " "(*castedOp{1}.getODSOperands({2}).begin()).getDefiningOp();\n", depth + 1, depth, i); emitOpMatch(argTree, depth + 1); - os.indent(indent + 2) - << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1); - os.indent(indent) << "}\n"; + os << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1); + os.unindent() << "}\n"; continue; } // Next handle DAG leaf: operand or attribute if (opArg.is()) { - emitOperandMatch(tree, i, depth, indent); + emitOperandMatch(tree, i, depth); } else if (opArg.is()) { - emitAttributeMatch(tree, i, depth, indent); + emitAttributeMatch(tree, i, depth); } else { PrintFatalError(loc, "unhandled case when matching op"); } @@ -280,8 +279,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { << '\n'); } -void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth, - int indent) { +void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth) { Operator &op = tree.getDialectOp(opMap); auto *operand = op.getArg(argIndex).get(); auto matcher = tree.getArgAsLeaf(argIndex); @@ -328,30 +326,28 @@ void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth, op.arg_begin(), op.arg_begin() + argIndex, [](const Argument &arg) { return arg.is(); }); - os.indent(indent) << formatv("{0} = castedOp{1}.getODSOperands({2});\n", - name, depth, argIndex - numPrevAttrs); + os << formatv("{0} = castedOp{1}.getODSOperands({2});\n", name, depth, + argIndex - numPrevAttrs); } } -void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth, - int indent) { +void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth) { Operator &op = tree.getDialectOp(opMap); auto *namedAttr = op.getArg(argIndex).get(); const auto &attr = namedAttr->attr; - os.indent(indent) << "{\n"; - indent += 2; - os.indent(indent) << formatv( - "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\");" + os << "{\n"; + os.indent() << formatv( + "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\"); " "(void)tblgen_attr;\n", depth, attr.getStorageType(), namedAttr->name); // TODO: This should use getter method to avoid duplication. if (attr.hasDefaultValue()) { - os.indent(indent) << "if (!tblgen_attr) tblgen_attr = " - << std::string(tgfmt(attr.getConstBuilderTemplate(), - &fmtCtx, attr.getDefaultValue())) - << ";\n"; + os << "if (!tblgen_attr) tblgen_attr = " + << std::string(tgfmt(attr.getConstBuilderTemplate(), &fmtCtx, + attr.getDefaultValue())) + << ";\n"; } else if (attr.isOptional()) { // For a missing attribute that is optional according to definition, we // should just capture a mlir::Attribute() to signal the missing state. @@ -387,27 +383,20 @@ void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth, auto name = tree.getArgName(argIndex); // `$_` is a special symbol to ignore op argument matching. if (!name.empty() && name != "_") { - os.indent(indent) << formatv("{0} = tblgen_attr;\n", name); + os << formatv("{0} = tblgen_attr;\n", name); } - indent -= 2; - os.indent(indent) << "}\n"; + os.unindent() << "}\n"; } void PatternEmitter::emitMatchCheck( int depth, const FmtObjectBase &matchFmt, const llvm::formatv_object_base &failureFmt) { - // {0} The match depth (used to get the operation that failed to match). - // {1} The format for the match string. - // {2} The format for the failure string. - const char *matchStr = R"( - if (!({1})) { - return rewriter.notifyMatchFailure(op{0}, [&](::mlir::Diagnostic &diag) { - diag << {2}; - }); - })"; - os << llvm::formatv(matchStr, depth, matchFmt.str(), failureFmt.str()) - << "\n"; + os << "if (!(" << matchFmt.str() << "))"; + os.scope("{\n", "\n}\n").os + << "return rewriter.notifyMatchFailure(op" << depth + << ", [&](::mlir::Diagnostic &diag) {\n diag << " << failureFmt.str() + << ";\n});"; } void PatternEmitter::emitMatchLogic(DagNode tree) { @@ -491,7 +480,7 @@ void PatternEmitter::emit(StringRef rewriteName) { // Emit RewritePattern for Pattern. auto locs = pattern.getLocation(); - os << formatv("/* Generated from:\n\t{0:$[ instantiating\n\t]}\n*/\n", + os << formatv("/* Generated from:\n {0:$[ instantiating\n ]}\n*/\n", make_range(locs.rbegin(), locs.rend())); os << formatv(R"(struct {0} : public ::mlir::RewritePattern { {0}(::mlir::MLIRContext *context) @@ -509,44 +498,48 @@ void PatternEmitter::emit(StringRef rewriteName) { os << formatv(R"(}, {0}, context) {{})", pattern.getBenefit()) << "\n"; // Emit matchAndRewrite() function. - os << R"( - ::mlir::LogicalResult - matchAndRewrite(::mlir::Operation *op0, - ::mlir::PatternRewriter &rewriter) const override { -)"; - - // Register all symbols bound in the source pattern. - pattern.collectSourcePatternBoundSymbols(symbolInfoMap); - - LLVM_DEBUG( - llvm::dbgs() << "start creating local variables for capturing matches\n"); - os.indent(4) << "// Variables for capturing values and attributes used for " - "creating ops\n"; - // Create local variables for storing the arguments and results bound - // to symbols. - for (const auto &symbolInfoPair : symbolInfoMap) { - StringRef symbol = symbolInfoPair.getKey(); - auto &info = symbolInfoPair.getValue(); - os.indent(4) << info.getVarDecl(symbol); + { + auto classScope = os.scope(); + os.reindent(R"( + ::mlir::LogicalResult matchAndRewrite(::mlir::Operation *op0, + ::mlir::PatternRewriter &rewriter) const override {)") + << '\n'; + { + auto functionScope = os.scope(); + + // Register all symbols bound in the source pattern. + pattern.collectSourcePatternBoundSymbols(symbolInfoMap); + + LLVM_DEBUG(llvm::dbgs() + << "start creating local variables for capturing matches\n"); + os << "// Variables for capturing values and attributes used while " + "creating ops\n"; + // Create local variables for storing the arguments and results bound + // to symbols. + for (const auto &symbolInfoPair : symbolInfoMap) { + StringRef symbol = symbolInfoPair.getKey(); + auto &info = symbolInfoPair.getValue(); + os << info.getVarDecl(symbol); + } + // TODO: capture ops with consistent numbering so that it can be + // reused for fused loc. + os << formatv("::mlir::Operation *tblgen_ops[{0}];\n\n", + pattern.getSourcePattern().getNumOps()); + LLVM_DEBUG(llvm::dbgs() + << "done creating local variables for capturing matches\n"); + + os << "// Match\n"; + os << "tblgen_ops[0] = op0;\n"; + emitMatchLogic(sourceTree); + + os << "\n// Rewrite\n"; + emitRewriteLogic(); + + os << "return success();\n"; + } + os << "};\n"; } - // TODO: capture ops with consistent numbering so that it can be - // reused for fused loc. - os.indent(4) << formatv("::mlir::Operation *tblgen_ops[{0}];\n\n", - pattern.getSourcePattern().getNumOps()); - LLVM_DEBUG( - llvm::dbgs() << "done creating local variables for capturing matches\n"); - - os.indent(4) << "// Match\n"; - os.indent(4) << "tblgen_ops[0] = op0;\n"; - emitMatchLogic(sourceTree); - os << "\n"; - - os.indent(4) << "// Rewrite\n"; - emitRewriteLogic(); - - os.indent(4) << "return success();\n"; - os << " };\n"; - os << "};\n"; + os << "};\n\n"; } void PatternEmitter::emitRewriteLogic() { @@ -586,7 +579,7 @@ void PatternEmitter::emitRewriteLogic() { PrintFatalError(loc, error); } - os.indent(4) << "auto odsLoc = rewriter.getFusedLoc({"; + os << "auto odsLoc = rewriter.getFusedLoc({"; for (int i = 0, e = pattern.getSourcePattern().getNumOps(); i != e; ++i) { os << (i ? ", " : "") << "tblgen_ops[" << i << "]->getLoc()"; } @@ -601,22 +594,21 @@ void PatternEmitter::emitRewriteLogic() { // we are handling auxiliary patterns so we want the side effect even if // NativeCodeCall is not replacing matched root op's results. if (resultTree.isNativeCodeCall()) - os.indent(4) << val << ";\n"; + os << val << ";\n"; } if (numExpectedResults == 0) { assert(replStartIndex >= numResultPatterns && "invalid auxiliary vs. replacement pattern division!"); // No result to replace. Just erase the op. - os.indent(4) << "rewriter.eraseOp(op0);\n"; + os << "rewriter.eraseOp(op0);\n"; } else { // Process replacement result patterns. - os.indent(4) - << "::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values;\n"; + os << "::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values;\n"; for (int i = replStartIndex; i < numResultPatterns; ++i) { DagNode resultTree = pattern.getResultPattern(i); auto val = handleResultPattern(resultTree, offsets[i], 0); - os.indent(4) << "\n"; + os << "\n"; // Resolve each symbol for all range use so that we can loop over them. // We need an explicit cast to `SmallVector` to capture the cases where // `{0}` resolves to an `Operation::result_range` as well as cases that @@ -625,12 +617,11 @@ void PatternEmitter::emitRewriteLogic() { // TODO: Revisit the need for materializing a vector. os << symbolInfoMap.getAllRangeUse( val, - " for (auto v : ::llvm::SmallVector<::mlir::Value, 4>{ {0} }) {{ " - "tblgen_repl_values.push_back(v); }", + "for (auto v: ::llvm::SmallVector<::mlir::Value, 4>{ {0} }) {{\n" + " tblgen_repl_values.push_back(v);\n}\n", "\n"); } - os.indent(4) << "\n"; - os.indent(4) << "rewriter.replaceOp(op0, tblgen_repl_values);\n"; + os << "\nrewriter.replaceOp(op0, tblgen_repl_values);\n"; } LLVM_DEBUG(llvm::dbgs() << "--- done emitting rewrite logic ---\n"); @@ -879,9 +870,8 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, } // Create the local variable for this op. - os.indent(4) << formatv("{0} {1};\n", resultOp.getQualCppClassName(), - valuePackName); - os.indent(4) << "{\n"; + os << formatv("{0} {1};\n{{\n", resultOp.getQualCppClassName(), + valuePackName); // Right now ODS don't have general type inference support. Except a few // special cases listed below, DRR needs to supply types for all results @@ -900,10 +890,9 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, createAggregateLocalVarsForOpArgs(tree, childNodeNames); // Then create the op. - os.indent(6) << formatv( - "{0} = rewriter.create<{1}>({2}, tblgen_values, tblgen_attrs);\n", + os.scope("", "\n}\n").os << formatv( + "{0} = rewriter.create<{1}>({2}, tblgen_values, tblgen_attrs);", valuePackName, resultOp.getQualCppClassName(), locToUse); - os.indent(4) << "}\n"; return resultValue; } @@ -920,11 +909,10 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // aggregate-parameter builders. createSeparateLocalVarsForOpArgs(tree, childNodeNames); - os.indent(6) << formatv("{0} = rewriter.create<{1}>({2}", valuePackName, - resultOp.getQualCppClassName(), locToUse); + os.scope().os << formatv("{0} = rewriter.create<{1}>({2}", valuePackName, + resultOp.getQualCppClassName(), locToUse); supplyValuesForOpArgs(tree, childNodeNames); - os << "\n );\n"; - os.indent(4) << "}\n"; + os << "\n );\n}\n"; return resultValue; } @@ -938,20 +926,19 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // Then prepare the result types. We need to specify the types for all // results. - os.indent(6) << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " - "(void)tblgen_types;\n"); + os.indent() << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " + "(void)tblgen_types;\n"); int numResults = resultOp.getNumResults(); if (numResults != 0) { for (int i = 0; i < numResults; ++i) - os.indent(6) << formatv("for (auto v : castedOp0.getODSResults({0})) {{" - "tblgen_types.push_back(v.getType()); }\n", - resultIndex + i); + os << formatv("for (auto v: castedOp0.getODSResults({0})) {{\n" + " tblgen_types.push_back(v.getType());\n}\n", + resultIndex + i); } - os.indent(6) << formatv("{0} = rewriter.create<{1}>({2}, tblgen_types, " - "tblgen_values, tblgen_attrs);\n", - valuePackName, resultOp.getQualCppClassName(), - locToUse); - os.indent(4) << "}\n"; + os << formatv("{0} = rewriter.create<{1}>({2}, tblgen_types, " + "tblgen_values, tblgen_attrs);\n", + valuePackName, resultOp.getQualCppClassName(), locToUse); + os.unindent() << "}\n"; return resultValue; } @@ -968,16 +955,15 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) { const auto *operand = resultOp.getArg(argIndex).dyn_cast(); - if (!operand) { - // We do not need special handling for attributes. + // We do not need special handling for attributes. + if (!operand) continue; - } + raw_indented_ostream::DelimitedScope scope(os); std::string varName; if (operand->isVariadic()) { varName = std::string(formatv("tblgen_values_{0}", valueIndex++)); - os.indent(6) << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", - varName); + os << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", varName); std::string range; if (node.isNestedDagArg(argIndex)) { range = childNodeNames[argIndex]; @@ -987,11 +973,11 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( // Resolve the symbol for all range use so that we have a uniform way of // capturing the values. range = symbolInfoMap.getValueAndRangeUse(range); - os.indent(6) << formatv("for (auto v : {0}) {1}.push_back(v);\n", range, - varName); + os << formatv("for (auto v: {0}) {{\n {1}.push_back(v);\n}\n", range, + varName); } else { varName = std::string(formatv("tblgen_value_{0}", valueIndex++)); - os.indent(6) << formatv("::mlir::Value {0} = ", varName); + os << formatv("::mlir::Value {0} = ", varName); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse(childNodeNames[argIndex]); } else { @@ -1019,7 +1005,7 @@ void PatternEmitter::supplyValuesForOpArgs( for (int argIndex = 0, numOpArgs = resultOp.getNumArgs(); argIndex != numOpArgs; ++argIndex) { // Start each argument on its own line. - (os << ",\n").indent(8); + os << ",\n "; Argument opArg = resultOp.getArg(argIndex); // Handle the case of operand first. @@ -1060,14 +1046,16 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( DagNode node, const ChildNodeIndexNameMap &childNodeNames) { Operator &resultOp = node.getDialectOp(opMap); - os.indent(6) << formatv("::mlir::SmallVector<::mlir::Value, 4> " - "tblgen_values; (void)tblgen_values;\n"); - os.indent(6) << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " - "tblgen_attrs; (void)tblgen_attrs;\n"); + auto scope = os.scope(); + os << formatv("::mlir::SmallVector<::mlir::Value, 4> " + "tblgen_values; (void)tblgen_values;\n"); + os << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " + "tblgen_attrs; (void)tblgen_attrs;\n"); const char *addAttrCmd = - "if (auto tmpAttr = {1}) " - "tblgen_attrs.emplace_back(rewriter.getIdentifier(\"{0}\"), tmpAttr);\n"; + "if (auto tmpAttr = {1}) {\n" + " tblgen_attrs.emplace_back(rewriter.getIdentifier(\"{0}\"), " + "tmpAttr);\n}\n"; for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) { if (resultOp.getArg(argIndex).is()) { // The argument in the op definition. @@ -1076,14 +1064,14 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( if (!subTree.isNativeCodeCall()) PrintFatalError(loc, "only NativeCodeCall allowed in nested dag node " "for creating attribute"); - os.indent(6) << formatv(addAttrCmd, opArgName, - handleReplaceWithNativeCodeCall(subTree)); + os << formatv(addAttrCmd, opArgName, + handleReplaceWithNativeCodeCall(subTree)); } else { auto leaf = node.getArgAsLeaf(argIndex); // The argument in the result DAG pattern. auto patArgName = node.getArgName(argIndex); - os.indent(6) << formatv(addAttrCmd, opArgName, - handleOpArgument(leaf, patArgName)); + os << formatv(addAttrCmd, opArgName, + handleOpArgument(leaf, patArgName)); } continue; } @@ -1101,10 +1089,10 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( // Resolve the symbol for all range use so that we have a uniform way of // capturing the values. range = symbolInfoMap.getValueAndRangeUse(range); - os.indent(6) << formatv( - "for (auto v : {0}) tblgen_values.push_back(v);\n", range); + os << formatv("for (auto v: {0}) {{\n tblgen_values.push_back(v);\n}\n", + range); } else { - os.indent(6) << formatv("tblgen_values.push_back(", varName); + os << formatv("tblgen_values.push_back(", varName); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse( childNodeNames.lookup(argIndex)); diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt new file mode 100644 index 0000000000000..42a1c21261c4d --- /dev/null +++ b/mlir/unittests/Support/CMakeLists.txt @@ -0,0 +1,6 @@ +add_mlir_unittest(MLIRSupportTests + IndentedOstreamTest.cpp +) + +target_link_libraries(MLIRSupportTests + PRIVATE MLIRSupportIdentedOstream MLIRSupport) diff --git a/mlir/unittests/Support/IndentedOstreamTest.cpp b/mlir/unittests/Support/IndentedOstreamTest.cpp new file mode 100644 index 0000000000000..0271eb73e8897 --- /dev/null +++ b/mlir/unittests/Support/IndentedOstreamTest.cpp @@ -0,0 +1,110 @@ +//===- IndentedOstreamTest.cpp - Indented raw ostream Tests ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/IndentedOstream.h" +#include "gmock/gmock.h" + +using namespace mlir; +using ::testing::StrEq; + +TEST(FormatTest, SingleLine) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros << 10; + ros.flush(); + EXPECT_THAT(os.str(), StrEq("10")); +} + +TEST(FormatTest, SimpleMultiLine) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros << "a"; + ros << "b"; + ros << "\n"; + ros << "c"; + ros << "\n"; + ros.flush(); + EXPECT_THAT(os.str(), StrEq("ab\nc\n")); +} + +TEST(FormatTest, SimpleMultiLineIndent) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros.indent(2) << "a"; + ros.indent(4) << "b"; + ros << "\n"; + ros << "c"; + ros << "\n"; + ros.flush(); + EXPECT_THAT(os.str(), StrEq(" a b\n c\n")); +} + +TEST(FormatTest, SingleRegion) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros << "before\n"; + { + raw_indented_ostream::DelimitedScope scope(ros); + ros << "inside " << 10; + ros << "\n two\n"; + { + raw_indented_ostream::DelimitedScope scope(ros, "{\n", "\n}\n"); + ros << "inner inner"; + } + } + ros << "after"; + ros.flush(); + const auto *expected = + R"(before + inside 10 + two + { + inner inner + } +after)"; + EXPECT_THAT(os.str(), StrEq(expected)); + + // Repeat the above with inline form. + str.clear(); + ros << "before\n"; + ros.scope().os << "inside " << 10 << "\n two\n"; + ros.scope().os.scope("{\n", "\n}\n").os << "inner inner"; + ros << "after"; + ros.flush(); + EXPECT_THAT(os.str(), StrEq(expected)); +} + +TEST(FormatTest, Reindent) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + + // String to print with some additional empty lines at the start and lines + // with just spaces. + const auto *desc = R"( + + + First line + second line + + + )"; + ros.reindent(desc); + ros.flush(); + const auto *expected = + R"(First line + second line + + +)"; + EXPECT_THAT(os.str(), StrEq(expected)); +} From b82a7486d108a708f00c00feed784f34711300db Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Oct 2020 17:10:44 +0100 Subject: [PATCH 463/544] [InstCombine] Add or(shl(v,and(x,bw-1)),lshr(v,bw-and(x,bw-1))) rotate tests If we know the shift amount is less than the bitwidth we should be able to convert this to a rotate/funnel shift --- llvm/test/Transforms/InstCombine/rotate.ll | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index 514c1d6cf7d84..d08fe07784224 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -675,6 +675,61 @@ define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) { ret i9 %ret } +; Fold or(shl(v,x),lshr(v,bw-x)) iff x < bw + +define i64 @rotl_sub_mask(i64 %0, i64 %1) { +; CHECK-LABEL: @rotl_sub_mask( +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63 +; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0:%.*]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP0]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]] +; CHECK-NEXT: ret i64 [[TMP7]] +; + %3 = and i64 %1, 63 + %4 = shl i64 %0, %3 + %5 = sub nuw nsw i64 64, %3 + %6 = lshr i64 %0, %5 + %7 = or i64 %6, %4 + ret i64 %7 +} + +; Fold or(lshr(v,x),shl(v,bw-x)) iff x < bw + +define i64 @rotr_sub_mask(i64 %0, i64 %1) { +; CHECK-LABEL: @rotr_sub_mask( +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0:%.*]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP0]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]] +; CHECK-NEXT: ret i64 [[TMP7]] +; + %3 = and i64 %1, 63 + %4 = lshr i64 %0, %3 + %5 = sub nuw nsw i64 64, %3 + %6 = shl i64 %0, %5 + %7 = or i64 %6, %4 + ret i64 %7 +} + +define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) { +; CHECK-LABEL: @rotr_sub_mask_vector( +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP1:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP0:%.*]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <2 x i64> , [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = shl <2 x i64> [[TMP0]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP6]], [[TMP4]] +; CHECK-NEXT: ret <2 x i64> [[TMP7]] +; + %3 = and <2 x i64> %1, + %4 = lshr <2 x i64> %0, %3 + %5 = sub nuw nsw <2 x i64> , %3 + %6 = shl <2 x i64> %0, %5 + %7 = or <2 x i64> %6, %4 + ret <2 x i64> %7 +} + ; Convert select pattern to masked shift that ends in 'or'. define i32 @rotr_select(i32 %x, i32 %shamt) { From be185b6a7355fdfeb1c31df2e1272366fe58b01f Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Sat, 3 Oct 2020 09:41:35 -0700 Subject: [PATCH 464/544] Revert "Add indented raw_ostream class" This reverts commit 78530ce65375fa02bc96019e5cc9d73db8adaca4. Fails on shared_lib build. --- mlir/include/mlir/Support/IndentedOstream.h | 102 ------- mlir/lib/Support/CMakeLists.txt | 8 - mlir/lib/Support/IndentedOstream.cpp | 65 ----- mlir/tools/mlir-tblgen/CMakeLists.txt | 1 - mlir/tools/mlir-tblgen/OpDocGen.cpp | 40 ++- mlir/tools/mlir-tblgen/RewriterGen.cpp | 250 +++++++++--------- mlir/unittests/Support/CMakeLists.txt | 6 - .../unittests/Support/IndentedOstreamTest.cpp | 110 -------- 8 files changed, 166 insertions(+), 416 deletions(-) delete mode 100644 mlir/include/mlir/Support/IndentedOstream.h delete mode 100644 mlir/lib/Support/IndentedOstream.cpp delete mode 100644 mlir/unittests/Support/CMakeLists.txt delete mode 100644 mlir/unittests/Support/IndentedOstreamTest.cpp diff --git a/mlir/include/mlir/Support/IndentedOstream.h b/mlir/include/mlir/Support/IndentedOstream.h deleted file mode 100644 index 20161c1f3898f..0000000000000 --- a/mlir/include/mlir/Support/IndentedOstream.h +++ /dev/null @@ -1,102 +0,0 @@ -//===- IndentedOstream.h - raw ostream wrapper to indent --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// raw_ostream subclass that keeps track of indentation for textual output -// where indentation helps readability. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_SUPPORT_INDENTEDOSTREAM_H_ -#define MLIR_SUPPORT_INDENTEDOSTREAM_H_ - -#include "mlir/Support/LLVM.h" -#include "llvm/Support/raw_ostream.h" - -namespace mlir { - -/// raw_ostream subclass that simplifies indention a sequence of code. -class raw_indented_ostream : public raw_ostream { -public: - explicit raw_indented_ostream(llvm::raw_ostream &os) : os(os) { - SetUnbuffered(); - } - - /// Simple RAII struct to use to indentation around entering/exiting region. - struct DelimitedScope { - explicit DelimitedScope(raw_indented_ostream &os, StringRef open = "", - StringRef close = "") - : os(os), open(open), close(close) { - os << open; - os.indent(); - } - ~DelimitedScope() { - os.unindent(); - os << close; - } - - raw_indented_ostream &os; - - private: - llvm::StringRef open, close; - }; - - /// Returns DelimitedScope. - DelimitedScope scope(StringRef open = "", StringRef close = "") { - return DelimitedScope(*this, open, close); - } - - /// Re-indents by removing the leading whitespace from the first non-empty - /// line from every line of the the string, skipping over empty lines at the - /// start. - raw_indented_ostream &reindent(StringRef str); - - /// Increases the indent and returning this raw_indented_ostream. - raw_indented_ostream &indent() { - currentIndent += indentSize; - return *this; - } - - /// Decreases the indent and returning this raw_indented_ostream. - raw_indented_ostream &unindent() { - currentIndent = std::max(0, currentIndent - indentSize); - return *this; - } - - /// Emits whitespace and sets the indendation for the stream. - raw_indented_ostream &indent(int with) { - os.indent(with); - atStartOfLine = false; - currentIndent = with; - return *this; - } - -private: - void write_impl(const char *ptr, size_t size) override; - - /// Return the current position within the stream, not counting the bytes - /// currently in the buffer. - uint64_t current_pos() const override { return os.tell(); } - - /// Constant indent added/removed. - static constexpr int indentSize = 2; - - // Tracker for current indentation. - int currentIndent = 0; - - // The leading whitespace of the string being printed, if reindent is used. - int leadingWs = 0; - - // Tracks whether at start of line and so indent is required or not. - bool atStartOfLine = true; - - // The underlying raw_ostream. - raw_ostream &os; -}; - -} // namespace mlir -#endif // MLIR_SUPPORT_INDENTEDOSTREAM_H_ diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt index 16584e0821099..bdba990571721 100644 --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -1,6 +1,5 @@ set(LLVM_OPTIONAL_SOURCES FileUtilities.cpp - IndentedOstream.cpp MlirOptMain.cpp StorageUniquer.cpp ToolUtilities.cpp @@ -28,10 +27,3 @@ add_mlir_library(MLIROptLib MLIRParser MLIRSupport ) - -# This doesn't use add_mlir_library as it is used in mlir-tblgen and else -# mlir-tblgen ends up depending on mlir-generic-headers. -add_llvm_library(MLIRSupportIdentedOstream - IndentedOstream.cpp - - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support) diff --git a/mlir/lib/Support/IndentedOstream.cpp b/mlir/lib/Support/IndentedOstream.cpp deleted file mode 100644 index bb3feef6c4458..0000000000000 --- a/mlir/lib/Support/IndentedOstream.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===- IndentedOstream.cpp - raw ostream wrapper to indent ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// raw_ostream subclass that keeps track of indentation for textual output -// where indentation helps readability. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Support/IndentedOstream.h" - -using namespace mlir; - -raw_indented_ostream &mlir::raw_indented_ostream::reindent(StringRef str) { - StringRef remaining = str; - // Find leading whitespace indent. - while (!remaining.empty()) { - auto split = remaining.split('\n'); - size_t indent = split.first.find_first_not_of(" \t"); - if (indent != StringRef::npos) { - leadingWs = indent; - break; - } - remaining = split.second; - } - // Print, skipping the empty lines. - *this << remaining; - leadingWs = 0; - return *this; -} - -void mlir::raw_indented_ostream::write_impl(const char *ptr, size_t size) { - StringRef str(ptr, size); - // Print out indented. - auto print = [this](StringRef str) { - if (atStartOfLine) - os.indent(currentIndent) << str.substr(leadingWs); - else - os << str.substr(leadingWs); - }; - - while (!str.empty()) { - size_t idx = str.find('\n'); - if (idx == StringRef::npos) { - if (!str.substr(leadingWs).empty()) { - print(str); - atStartOfLine = false; - } - break; - } - - auto split = - std::make_pair(str.slice(0, idx), str.slice(idx + 1, StringRef::npos)); - // Print empty new line without spaces if line only has spaces. - if (!split.first.ltrim().empty()) - print(split.first); - os << '\n'; - atStartOfLine = true; - str = split.second; - } -} diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index df004adb1bed5..46b9d81115c9b 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -25,7 +25,6 @@ add_tablegen(mlir-tblgen MLIR set_target_properties(mlir-tblgen PROPERTIES FOLDER "Tablegenning") target_link_libraries(mlir-tblgen PRIVATE - MLIRSupportIdentedOstream MLIRTableGen) mlir_check_all_link_libraries(mlir-tblgen) diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index ff6a290397630..df78556c1c77b 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "DocGenUtilities.h" -#include "mlir/Support/IndentedOstream.h" #include "mlir/TableGen/GenInfo.h" #include "mlir/TableGen/Operator.h" #include "llvm/ADT/DenseMap.h" @@ -36,8 +35,39 @@ using mlir::tblgen::Operator; // in a way the user wanted but has some additional indenting due to being // nested in the op definition. void mlir::tblgen::emitDescription(StringRef description, raw_ostream &os) { - raw_indented_ostream ros(os); - ros.reindent(description.rtrim(" \t")); + // Determine the minimum number of spaces in a line. + size_t min_indent = -1; + StringRef remaining = description; + while (!remaining.empty()) { + auto split = remaining.split('\n'); + size_t indent = split.first.find_first_not_of(" \t"); + if (indent != StringRef::npos) + min_indent = std::min(indent, min_indent); + remaining = split.second; + } + + // Print out the description indented. + os << "\n"; + remaining = description; + bool printed = false; + while (!remaining.empty()) { + auto split = remaining.split('\n'); + if (split.second.empty()) { + // Skip last line with just spaces. + if (split.first.ltrim().empty()) + break; + } + // Print empty new line without spaces if line only has spaces, unless no + // text has been emitted before. + if (split.first.ltrim().empty()) { + if (printed) + os << "\n"; + } else { + os << split.first.substr(min_indent) << "\n"; + printed = true; + } + remaining = split.second; + } } // Emits `str` with trailing newline if not empty. @@ -86,7 +116,7 @@ static void emitOpDoc(Operator op, raw_ostream &os) { // Emit the summary, syntax, and description if present. if (op.hasSummary()) - os << "\n" << op.getSummary() << "\n\n"; + os << "\n" << op.getSummary() << "\n"; if (op.hasAssemblyFormat()) emitAssemblyFormat(op.getOperationName(), op.getAssemblyFormat().trim(), os); @@ -198,7 +228,7 @@ static void emitDialectDoc(const RecordKeeper &recordKeeper, raw_ostream &os) { } os << "\n"; - for (const auto &dialectWithOps : dialectOps) + for (auto dialectWithOps : dialectOps) emitDialectDoc(dialectWithOps.first, dialectWithOps.second, dialectTypes[dialectWithOps.first], os); } diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index e16900227759d..9b2f35f566246 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Support/IndentedOstream.h" #include "mlir/TableGen/Attribute.h" #include "mlir/TableGen/Format.h" #include "mlir/TableGen/GenInfo.h" @@ -78,11 +77,11 @@ class PatternEmitter { // Emits C++ statements for matching the `argIndex`-th argument of the given // DAG `tree` as an operand. - void emitOperandMatch(DagNode tree, int argIndex, int depth); + void emitOperandMatch(DagNode tree, int argIndex, int depth, int indent); // Emits C++ statements for matching the `argIndex`-th argument of the given // DAG `tree` as an attribute. - void emitAttributeMatch(DagNode tree, int argIndex, int depth); + void emitAttributeMatch(DagNode tree, int argIndex, int depth, int indent); // Emits C++ for checking a match with a corresponding match failure // diagnostic. @@ -185,7 +184,7 @@ class PatternEmitter { // The next unused ID for newly created values. unsigned nextValueId; - raw_indented_ostream os; + raw_ostream &os; // Format contexts containing placeholder substitutions. FmtContext fmtCtx; @@ -226,7 +225,8 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { // Skip the operand matching at depth 0 as the pattern rewriter already does. if (depth != 0) { // Skip if there is no defining operation (e.g., arguments to function). - os << formatv("if (!castedOp{0})\n return failure();\n", depth); + os.indent(indent) << formatv("if (!castedOp{0}) return failure();\n", + depth); } if (tree.getNumArgs() != op.getNumArgs()) { PrintFatalError(loc, formatv("op '{0}' argument number mismatch: {1} in " @@ -238,7 +238,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { // If the operand's name is set, set to that variable. auto name = tree.getSymbol(); if (!name.empty()) - os << formatv("{0} = castedOp{1};\n", name, depth); + os.indent(indent) << formatv("{0} = castedOp{1};\n", name, depth); for (int i = 0, e = tree.getNumArgs(); i != e; ++i) { auto opArg = op.getArg(i); @@ -253,23 +253,24 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { PrintFatalError(loc, error); } } - os << "{\n"; + os.indent(indent) << "{\n"; - os.indent() << formatv( + os.indent(indent + 2) << formatv( "auto *op{0} = " "(*castedOp{1}.getODSOperands({2}).begin()).getDefiningOp();\n", depth + 1, depth, i); emitOpMatch(argTree, depth + 1); - os << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1); - os.unindent() << "}\n"; + os.indent(indent + 2) + << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1); + os.indent(indent) << "}\n"; continue; } // Next handle DAG leaf: operand or attribute if (opArg.is()) { - emitOperandMatch(tree, i, depth); + emitOperandMatch(tree, i, depth, indent); } else if (opArg.is()) { - emitAttributeMatch(tree, i, depth); + emitAttributeMatch(tree, i, depth, indent); } else { PrintFatalError(loc, "unhandled case when matching op"); } @@ -279,7 +280,8 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { << '\n'); } -void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth) { +void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth, + int indent) { Operator &op = tree.getDialectOp(opMap); auto *operand = op.getArg(argIndex).get(); auto matcher = tree.getArgAsLeaf(argIndex); @@ -326,28 +328,30 @@ void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth) { op.arg_begin(), op.arg_begin() + argIndex, [](const Argument &arg) { return arg.is(); }); - os << formatv("{0} = castedOp{1}.getODSOperands({2});\n", name, depth, - argIndex - numPrevAttrs); + os.indent(indent) << formatv("{0} = castedOp{1}.getODSOperands({2});\n", + name, depth, argIndex - numPrevAttrs); } } -void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth) { +void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth, + int indent) { Operator &op = tree.getDialectOp(opMap); auto *namedAttr = op.getArg(argIndex).get(); const auto &attr = namedAttr->attr; - os << "{\n"; - os.indent() << formatv( - "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\"); " + os.indent(indent) << "{\n"; + indent += 2; + os.indent(indent) << formatv( + "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\");" "(void)tblgen_attr;\n", depth, attr.getStorageType(), namedAttr->name); // TODO: This should use getter method to avoid duplication. if (attr.hasDefaultValue()) { - os << "if (!tblgen_attr) tblgen_attr = " - << std::string(tgfmt(attr.getConstBuilderTemplate(), &fmtCtx, - attr.getDefaultValue())) - << ";\n"; + os.indent(indent) << "if (!tblgen_attr) tblgen_attr = " + << std::string(tgfmt(attr.getConstBuilderTemplate(), + &fmtCtx, attr.getDefaultValue())) + << ";\n"; } else if (attr.isOptional()) { // For a missing attribute that is optional according to definition, we // should just capture a mlir::Attribute() to signal the missing state. @@ -383,20 +387,27 @@ void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth) { auto name = tree.getArgName(argIndex); // `$_` is a special symbol to ignore op argument matching. if (!name.empty() && name != "_") { - os << formatv("{0} = tblgen_attr;\n", name); + os.indent(indent) << formatv("{0} = tblgen_attr;\n", name); } - os.unindent() << "}\n"; + indent -= 2; + os.indent(indent) << "}\n"; } void PatternEmitter::emitMatchCheck( int depth, const FmtObjectBase &matchFmt, const llvm::formatv_object_base &failureFmt) { - os << "if (!(" << matchFmt.str() << "))"; - os.scope("{\n", "\n}\n").os - << "return rewriter.notifyMatchFailure(op" << depth - << ", [&](::mlir::Diagnostic &diag) {\n diag << " << failureFmt.str() - << ";\n});"; + // {0} The match depth (used to get the operation that failed to match). + // {1} The format for the match string. + // {2} The format for the failure string. + const char *matchStr = R"( + if (!({1})) { + return rewriter.notifyMatchFailure(op{0}, [&](::mlir::Diagnostic &diag) { + diag << {2}; + }); + })"; + os << llvm::formatv(matchStr, depth, matchFmt.str(), failureFmt.str()) + << "\n"; } void PatternEmitter::emitMatchLogic(DagNode tree) { @@ -480,7 +491,7 @@ void PatternEmitter::emit(StringRef rewriteName) { // Emit RewritePattern for Pattern. auto locs = pattern.getLocation(); - os << formatv("/* Generated from:\n {0:$[ instantiating\n ]}\n*/\n", + os << formatv("/* Generated from:\n\t{0:$[ instantiating\n\t]}\n*/\n", make_range(locs.rbegin(), locs.rend())); os << formatv(R"(struct {0} : public ::mlir::RewritePattern { {0}(::mlir::MLIRContext *context) @@ -498,48 +509,44 @@ void PatternEmitter::emit(StringRef rewriteName) { os << formatv(R"(}, {0}, context) {{})", pattern.getBenefit()) << "\n"; // Emit matchAndRewrite() function. - { - auto classScope = os.scope(); - os.reindent(R"( - ::mlir::LogicalResult matchAndRewrite(::mlir::Operation *op0, - ::mlir::PatternRewriter &rewriter) const override {)") - << '\n'; - { - auto functionScope = os.scope(); - - // Register all symbols bound in the source pattern. - pattern.collectSourcePatternBoundSymbols(symbolInfoMap); - - LLVM_DEBUG(llvm::dbgs() - << "start creating local variables for capturing matches\n"); - os << "// Variables for capturing values and attributes used while " - "creating ops\n"; - // Create local variables for storing the arguments and results bound - // to symbols. - for (const auto &symbolInfoPair : symbolInfoMap) { - StringRef symbol = symbolInfoPair.getKey(); - auto &info = symbolInfoPair.getValue(); - os << info.getVarDecl(symbol); - } - // TODO: capture ops with consistent numbering so that it can be - // reused for fused loc. - os << formatv("::mlir::Operation *tblgen_ops[{0}];\n\n", - pattern.getSourcePattern().getNumOps()); - LLVM_DEBUG(llvm::dbgs() - << "done creating local variables for capturing matches\n"); - - os << "// Match\n"; - os << "tblgen_ops[0] = op0;\n"; - emitMatchLogic(sourceTree); - - os << "\n// Rewrite\n"; - emitRewriteLogic(); - - os << "return success();\n"; - } - os << "};\n"; + os << R"( + ::mlir::LogicalResult + matchAndRewrite(::mlir::Operation *op0, + ::mlir::PatternRewriter &rewriter) const override { +)"; + + // Register all symbols bound in the source pattern. + pattern.collectSourcePatternBoundSymbols(symbolInfoMap); + + LLVM_DEBUG( + llvm::dbgs() << "start creating local variables for capturing matches\n"); + os.indent(4) << "// Variables for capturing values and attributes used for " + "creating ops\n"; + // Create local variables for storing the arguments and results bound + // to symbols. + for (const auto &symbolInfoPair : symbolInfoMap) { + StringRef symbol = symbolInfoPair.getKey(); + auto &info = symbolInfoPair.getValue(); + os.indent(4) << info.getVarDecl(symbol); } - os << "};\n\n"; + // TODO: capture ops with consistent numbering so that it can be + // reused for fused loc. + os.indent(4) << formatv("::mlir::Operation *tblgen_ops[{0}];\n\n", + pattern.getSourcePattern().getNumOps()); + LLVM_DEBUG( + llvm::dbgs() << "done creating local variables for capturing matches\n"); + + os.indent(4) << "// Match\n"; + os.indent(4) << "tblgen_ops[0] = op0;\n"; + emitMatchLogic(sourceTree); + os << "\n"; + + os.indent(4) << "// Rewrite\n"; + emitRewriteLogic(); + + os.indent(4) << "return success();\n"; + os << " };\n"; + os << "};\n"; } void PatternEmitter::emitRewriteLogic() { @@ -579,7 +586,7 @@ void PatternEmitter::emitRewriteLogic() { PrintFatalError(loc, error); } - os << "auto odsLoc = rewriter.getFusedLoc({"; + os.indent(4) << "auto odsLoc = rewriter.getFusedLoc({"; for (int i = 0, e = pattern.getSourcePattern().getNumOps(); i != e; ++i) { os << (i ? ", " : "") << "tblgen_ops[" << i << "]->getLoc()"; } @@ -594,21 +601,22 @@ void PatternEmitter::emitRewriteLogic() { // we are handling auxiliary patterns so we want the side effect even if // NativeCodeCall is not replacing matched root op's results. if (resultTree.isNativeCodeCall()) - os << val << ";\n"; + os.indent(4) << val << ";\n"; } if (numExpectedResults == 0) { assert(replStartIndex >= numResultPatterns && "invalid auxiliary vs. replacement pattern division!"); // No result to replace. Just erase the op. - os << "rewriter.eraseOp(op0);\n"; + os.indent(4) << "rewriter.eraseOp(op0);\n"; } else { // Process replacement result patterns. - os << "::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values;\n"; + os.indent(4) + << "::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values;\n"; for (int i = replStartIndex; i < numResultPatterns; ++i) { DagNode resultTree = pattern.getResultPattern(i); auto val = handleResultPattern(resultTree, offsets[i], 0); - os << "\n"; + os.indent(4) << "\n"; // Resolve each symbol for all range use so that we can loop over them. // We need an explicit cast to `SmallVector` to capture the cases where // `{0}` resolves to an `Operation::result_range` as well as cases that @@ -617,11 +625,12 @@ void PatternEmitter::emitRewriteLogic() { // TODO: Revisit the need for materializing a vector. os << symbolInfoMap.getAllRangeUse( val, - "for (auto v: ::llvm::SmallVector<::mlir::Value, 4>{ {0} }) {{\n" - " tblgen_repl_values.push_back(v);\n}\n", + " for (auto v : ::llvm::SmallVector<::mlir::Value, 4>{ {0} }) {{ " + "tblgen_repl_values.push_back(v); }", "\n"); } - os << "\nrewriter.replaceOp(op0, tblgen_repl_values);\n"; + os.indent(4) << "\n"; + os.indent(4) << "rewriter.replaceOp(op0, tblgen_repl_values);\n"; } LLVM_DEBUG(llvm::dbgs() << "--- done emitting rewrite logic ---\n"); @@ -870,8 +879,9 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, } // Create the local variable for this op. - os << formatv("{0} {1};\n{{\n", resultOp.getQualCppClassName(), - valuePackName); + os.indent(4) << formatv("{0} {1};\n", resultOp.getQualCppClassName(), + valuePackName); + os.indent(4) << "{\n"; // Right now ODS don't have general type inference support. Except a few // special cases listed below, DRR needs to supply types for all results @@ -890,9 +900,10 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, createAggregateLocalVarsForOpArgs(tree, childNodeNames); // Then create the op. - os.scope("", "\n}\n").os << formatv( - "{0} = rewriter.create<{1}>({2}, tblgen_values, tblgen_attrs);", + os.indent(6) << formatv( + "{0} = rewriter.create<{1}>({2}, tblgen_values, tblgen_attrs);\n", valuePackName, resultOp.getQualCppClassName(), locToUse); + os.indent(4) << "}\n"; return resultValue; } @@ -909,10 +920,11 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // aggregate-parameter builders. createSeparateLocalVarsForOpArgs(tree, childNodeNames); - os.scope().os << formatv("{0} = rewriter.create<{1}>({2}", valuePackName, - resultOp.getQualCppClassName(), locToUse); + os.indent(6) << formatv("{0} = rewriter.create<{1}>({2}", valuePackName, + resultOp.getQualCppClassName(), locToUse); supplyValuesForOpArgs(tree, childNodeNames); - os << "\n );\n}\n"; + os << "\n );\n"; + os.indent(4) << "}\n"; return resultValue; } @@ -926,19 +938,20 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // Then prepare the result types. We need to specify the types for all // results. - os.indent() << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " - "(void)tblgen_types;\n"); + os.indent(6) << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " + "(void)tblgen_types;\n"); int numResults = resultOp.getNumResults(); if (numResults != 0) { for (int i = 0; i < numResults; ++i) - os << formatv("for (auto v: castedOp0.getODSResults({0})) {{\n" - " tblgen_types.push_back(v.getType());\n}\n", - resultIndex + i); + os.indent(6) << formatv("for (auto v : castedOp0.getODSResults({0})) {{" + "tblgen_types.push_back(v.getType()); }\n", + resultIndex + i); } - os << formatv("{0} = rewriter.create<{1}>({2}, tblgen_types, " - "tblgen_values, tblgen_attrs);\n", - valuePackName, resultOp.getQualCppClassName(), locToUse); - os.unindent() << "}\n"; + os.indent(6) << formatv("{0} = rewriter.create<{1}>({2}, tblgen_types, " + "tblgen_values, tblgen_attrs);\n", + valuePackName, resultOp.getQualCppClassName(), + locToUse); + os.indent(4) << "}\n"; return resultValue; } @@ -955,15 +968,16 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) { const auto *operand = resultOp.getArg(argIndex).dyn_cast(); - // We do not need special handling for attributes. - if (!operand) + if (!operand) { + // We do not need special handling for attributes. continue; + } - raw_indented_ostream::DelimitedScope scope(os); std::string varName; if (operand->isVariadic()) { varName = std::string(formatv("tblgen_values_{0}", valueIndex++)); - os << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", varName); + os.indent(6) << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", + varName); std::string range; if (node.isNestedDagArg(argIndex)) { range = childNodeNames[argIndex]; @@ -973,11 +987,11 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( // Resolve the symbol for all range use so that we have a uniform way of // capturing the values. range = symbolInfoMap.getValueAndRangeUse(range); - os << formatv("for (auto v: {0}) {{\n {1}.push_back(v);\n}\n", range, - varName); + os.indent(6) << formatv("for (auto v : {0}) {1}.push_back(v);\n", range, + varName); } else { varName = std::string(formatv("tblgen_value_{0}", valueIndex++)); - os << formatv("::mlir::Value {0} = ", varName); + os.indent(6) << formatv("::mlir::Value {0} = ", varName); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse(childNodeNames[argIndex]); } else { @@ -1005,7 +1019,7 @@ void PatternEmitter::supplyValuesForOpArgs( for (int argIndex = 0, numOpArgs = resultOp.getNumArgs(); argIndex != numOpArgs; ++argIndex) { // Start each argument on its own line. - os << ",\n "; + (os << ",\n").indent(8); Argument opArg = resultOp.getArg(argIndex); // Handle the case of operand first. @@ -1046,16 +1060,14 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( DagNode node, const ChildNodeIndexNameMap &childNodeNames) { Operator &resultOp = node.getDialectOp(opMap); - auto scope = os.scope(); - os << formatv("::mlir::SmallVector<::mlir::Value, 4> " - "tblgen_values; (void)tblgen_values;\n"); - os << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " - "tblgen_attrs; (void)tblgen_attrs;\n"); + os.indent(6) << formatv("::mlir::SmallVector<::mlir::Value, 4> " + "tblgen_values; (void)tblgen_values;\n"); + os.indent(6) << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " + "tblgen_attrs; (void)tblgen_attrs;\n"); const char *addAttrCmd = - "if (auto tmpAttr = {1}) {\n" - " tblgen_attrs.emplace_back(rewriter.getIdentifier(\"{0}\"), " - "tmpAttr);\n}\n"; + "if (auto tmpAttr = {1}) " + "tblgen_attrs.emplace_back(rewriter.getIdentifier(\"{0}\"), tmpAttr);\n"; for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) { if (resultOp.getArg(argIndex).is()) { // The argument in the op definition. @@ -1064,14 +1076,14 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( if (!subTree.isNativeCodeCall()) PrintFatalError(loc, "only NativeCodeCall allowed in nested dag node " "for creating attribute"); - os << formatv(addAttrCmd, opArgName, - handleReplaceWithNativeCodeCall(subTree)); + os.indent(6) << formatv(addAttrCmd, opArgName, + handleReplaceWithNativeCodeCall(subTree)); } else { auto leaf = node.getArgAsLeaf(argIndex); // The argument in the result DAG pattern. auto patArgName = node.getArgName(argIndex); - os << formatv(addAttrCmd, opArgName, - handleOpArgument(leaf, patArgName)); + os.indent(6) << formatv(addAttrCmd, opArgName, + handleOpArgument(leaf, patArgName)); } continue; } @@ -1089,10 +1101,10 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( // Resolve the symbol for all range use so that we have a uniform way of // capturing the values. range = symbolInfoMap.getValueAndRangeUse(range); - os << formatv("for (auto v: {0}) {{\n tblgen_values.push_back(v);\n}\n", - range); + os.indent(6) << formatv( + "for (auto v : {0}) tblgen_values.push_back(v);\n", range); } else { - os << formatv("tblgen_values.push_back(", varName); + os.indent(6) << formatv("tblgen_values.push_back(", varName); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse( childNodeNames.lookup(argIndex)); diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt deleted file mode 100644 index 42a1c21261c4d..0000000000000 --- a/mlir/unittests/Support/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_mlir_unittest(MLIRSupportTests - IndentedOstreamTest.cpp -) - -target_link_libraries(MLIRSupportTests - PRIVATE MLIRSupportIdentedOstream MLIRSupport) diff --git a/mlir/unittests/Support/IndentedOstreamTest.cpp b/mlir/unittests/Support/IndentedOstreamTest.cpp deleted file mode 100644 index 0271eb73e8897..0000000000000 --- a/mlir/unittests/Support/IndentedOstreamTest.cpp +++ /dev/null @@ -1,110 +0,0 @@ -//===- IndentedOstreamTest.cpp - Indented raw ostream Tests ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Support/IndentedOstream.h" -#include "gmock/gmock.h" - -using namespace mlir; -using ::testing::StrEq; - -TEST(FormatTest, SingleLine) { - std::string str; - llvm::raw_string_ostream os(str); - raw_indented_ostream ros(os); - ros << 10; - ros.flush(); - EXPECT_THAT(os.str(), StrEq("10")); -} - -TEST(FormatTest, SimpleMultiLine) { - std::string str; - llvm::raw_string_ostream os(str); - raw_indented_ostream ros(os); - ros << "a"; - ros << "b"; - ros << "\n"; - ros << "c"; - ros << "\n"; - ros.flush(); - EXPECT_THAT(os.str(), StrEq("ab\nc\n")); -} - -TEST(FormatTest, SimpleMultiLineIndent) { - std::string str; - llvm::raw_string_ostream os(str); - raw_indented_ostream ros(os); - ros.indent(2) << "a"; - ros.indent(4) << "b"; - ros << "\n"; - ros << "c"; - ros << "\n"; - ros.flush(); - EXPECT_THAT(os.str(), StrEq(" a b\n c\n")); -} - -TEST(FormatTest, SingleRegion) { - std::string str; - llvm::raw_string_ostream os(str); - raw_indented_ostream ros(os); - ros << "before\n"; - { - raw_indented_ostream::DelimitedScope scope(ros); - ros << "inside " << 10; - ros << "\n two\n"; - { - raw_indented_ostream::DelimitedScope scope(ros, "{\n", "\n}\n"); - ros << "inner inner"; - } - } - ros << "after"; - ros.flush(); - const auto *expected = - R"(before - inside 10 - two - { - inner inner - } -after)"; - EXPECT_THAT(os.str(), StrEq(expected)); - - // Repeat the above with inline form. - str.clear(); - ros << "before\n"; - ros.scope().os << "inside " << 10 << "\n two\n"; - ros.scope().os.scope("{\n", "\n}\n").os << "inner inner"; - ros << "after"; - ros.flush(); - EXPECT_THAT(os.str(), StrEq(expected)); -} - -TEST(FormatTest, Reindent) { - std::string str; - llvm::raw_string_ostream os(str); - raw_indented_ostream ros(os); - - // String to print with some additional empty lines at the start and lines - // with just spaces. - const auto *desc = R"( - - - First line - second line - - - )"; - ros.reindent(desc); - ros.flush(); - const auto *expected = - R"(First line - second line - - -)"; - EXPECT_THAT(os.str(), StrEq(expected)); -} From dca4b7130de547860925631295acfce33130a100 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Oct 2020 17:51:40 +0100 Subject: [PATCH 465/544] [Analysis] resolveAllCalls - fix use after std::move warning. NFCI. We can't use Use.Calls after its std::move()'d to TmpCalls as it will be in an undefined state. Instead, swap with the known empty map in TmpCalls so we can then safely emplace_back into the now empty Use.Calls. Fixes clang static analyzer warning. --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 9947ddaf00714..8c9bce4ba67c5 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -692,7 +692,10 @@ const ConstantRange *findParamAccess(const FunctionSummary &FS, void resolveAllCalls(UseInfo &Use, const ModuleSummaryIndex *Index) { ConstantRange FullSet(Use.Range.getBitWidth(), true); - UseInfo::CallsTy TmpCalls = std::move(Use.Calls); + // Move Use.Calls to a temp storage and repopulate - don't use std::move as it + // leaves Use.Calls in an undefined state. + UseInfo::CallsTy TmpCalls; + std::swap(TmpCalls, Use.Calls); for (const auto &C : TmpCalls) { const Function *F = findCalleeInModule(C.first.Callee); if (F) { From 53fc426088d7e48272bfc37a3881a7a6fe405940 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 3 Oct 2020 18:32:47 +0100 Subject: [PATCH 466/544] [InstCombine] Add tests for or(shl(x,c1),lshr(y,c2)) patterns that could fold to funnel shifts Some initial test coverage toward fixing PR46896 - these are just copied from rotate.ll --- llvm/test/Transforms/InstCombine/funnel.ll | 184 +++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/funnel.ll diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll new file mode 100644 index 0000000000000..9adb91b88d7ff --- /dev/null +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +; TODO: Canonicalize or(shl,lshr) by constant to funnel shift intrinsics. +; This should help cost modeling for vectorization, inlining, etc. +; If a target does not have a fshl instruction, the expansion will +; be exactly these same 3 basic ops (shl/lshr/or). + +define i32 @fshl_i32_constant(i32 %x, i32 %y) { +; CHECK-LABEL: @fshl_i32_constant( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[Y:%.*]], 21 +; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]] +; CHECK-NEXT: ret i32 [[R]] +; + %shl = shl i32 %x, 11 + %shr = lshr i32 %y, 21 + %r = or i32 %shr, %shl + ret i32 %r +} + +define i42 @fshr_i42_constant(i42 %x, i42 %y) { +; CHECK-LABEL: @fshr_i42_constant( +; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[X:%.*]], 31 +; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[Y:%.*]], 11 +; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]] +; CHECK-NEXT: ret i42 [[R]] +; + %shl = shl i42 %x, 31 + %shr = lshr i42 %y, 11 + %r = or i42 %shr, %shl + ret i42 %r +} + +; TODO: Vector types are allowed. + +define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: @fshl_v2i16_constant_splat( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %shl = shl <2 x i16> %x, + %shr = lshr <2 x i16> %y, + %r = or <2 x i16> %shl, %shr + ret <2 x i16> %r +} + +define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %shl = shl <2 x i16> %x, + %shr = lshr <2 x i16> %y, + %r = or <2 x i16> %shl, %shr + ret <2 x i16> %r +} + +define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %shl = shl <2 x i16> %x, + %shr = lshr <2 x i16> %y, + %r = or <2 x i16> %shl, %shr + ret <2 x i16> %r +} + +; TODO: Non-power-of-2 vector types are allowed. + +define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { +; CHECK-LABEL: @fshr_v2i17_constant_splat( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: ret <2 x i17> [[R]] +; + %shl = shl <2 x i17> %x, + %shr = lshr <2 x i17> %y, + %r = or <2 x i17> %shr, %shl + ret <2 x i17> %r +} + +define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { +; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: ret <2 x i17> [[R]] +; + %shl = shl <2 x i17> %x, + %shr = lshr <2 x i17> %y, + %r = or <2 x i17> %shr, %shl + ret <2 x i17> %r +} + +define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { +; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: ret <2 x i17> [[R]] +; + %shl = shl <2 x i17> %x, + %shr = lshr <2 x i17> %y, + %r = or <2 x i17> %shr, %shl + ret <2 x i17> %r +} + +; TODO: Allow arbitrary shift constants. + +define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fshr_v2i32_constant_nonsplat( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %shl = shl <2 x i32> %x, + %shr = lshr <2 x i32> %y, + %r = or <2 x i32> %shl, %shr + ret <2 x i32> %r +} + +define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef0( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %shl = shl <2 x i32> %x, + %shr = lshr <2 x i32> %y, + %r = or <2 x i32> %shl, %shr + ret <2 x i32> %r +} + +define <2 x i32> @fshr_v2i32_constant_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef1( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %shl = shl <2 x i32> %x, + %shr = lshr <2 x i32> %y, + %r = or <2 x i32> %shl, %shr + ret <2 x i32> %r +} + +define <2 x i36> @fshl_v2i36_constant_nonsplat(<2 x i36> %x, <2 x i36> %y) { +; CHECK-LABEL: @fshl_v2i36_constant_nonsplat( +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i36> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i36> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i36> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <2 x i36> [[R]] +; + %shl = shl <2 x i36> %x, + %shr = lshr <2 x i36> %y, + %r = or <2 x i36> %shl, %shr + ret <2 x i36> %r +} + +define <3 x i36> @fshl_v3i36_constant_nonsplat_undef0(<3 x i36> %x, <3 x i36> %y) { +; CHECK-LABEL: @fshl_v3i36_constant_nonsplat_undef0( +; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i36> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i36> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <3 x i36> [[SHL]], [[SHR]] +; CHECK-NEXT: ret <3 x i36> [[R]] +; + %shl = shl <3 x i36> %x, + %shr = lshr <3 x i36> %y, + %r = or <3 x i36> %shl, %shr + ret <3 x i36> %r +} From 66e493f81e8e27b4a498a6dac54d404c2333fa5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 3 Oct 2020 16:38:09 +0200 Subject: [PATCH 467/544] [asan] Stop instrumenting user-defined ELF sections Do not instrument user-defined ELF sections (whose names resemble valid C identifiers). They may have special use semantics and modifying them may break programs. This is e.g. the case with NetBSD __link_set API that expects these sections to store consecutive array elements. Differential Revision: https://reviews.llvm.org/D76665 --- .../Instrumentation/AddressSanitizer.cpp | 8 ++++++++ .../do-not-instrument-netbsd-link_set.ll | 14 ++++++++++++++ .../instrument-section-invalid-c-ident.ll | 17 +++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-netbsd-link_set.ll create mode 100644 llvm/test/Instrumentation/AddressSanitizer/instrument-section-invalid-c-ident.ll diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index fd5eaada2febc..5902a2eb8374c 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1883,6 +1883,14 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { return false; } + // Do not instrument user-defined sections (with names resembling + // valid C identifiers) + if (TargetTriple.isOSBinFormatELF()) { + if (std::all_of(Section.begin(), Section.end(), + [](char c) { return llvm::isAlnum(c) || c == '_'; })) + return false; + } + // On COFF, if the section name contains '$', it is highly likely that the // user is using section sorting to create an array of globals similar to // the way initialization callbacks are registered in .init_array and diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-netbsd-link_set.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-netbsd-link_set.ll new file mode 100644 index 0000000000000..fd37d1357e87d --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-netbsd-link_set.ll @@ -0,0 +1,14 @@ +; This test checks that NetBSD link_set array elements remain consecutive. +; RUN: opt < %s -asan -asan-module -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-netbsd" + +module asm ".hidden __stop_link_set_test_set" + +@data1 = dso_local global i32 1, align 4 +@data2 = dso_local global i32 2, align 4 +@__link_set_test_set_sym_data1 = internal constant i8* bitcast (i32* @data1 to i8*), section "link_set_test_set", align 8 +@__link_set_test_set_sym_data2 = internal constant i8* bitcast (i32* @data2 to i8*), section "link_set_test_set", align 8 +; CHECK: @__link_set_test_set_sym_data1 = internal constant i8*{{.*}}, section "link_set_test_set" +; CHECK-NEXT: @__link_set_test_set_sym_data2 = internal constant i8*{{.*}}, section "link_set_test_set" diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-section-invalid-c-ident.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-section-invalid-c-ident.ll new file mode 100644 index 0000000000000..a320f797a7137 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-section-invalid-c-ident.ll @@ -0,0 +1,17 @@ +; This test checks that sections with names not resembling valid C identifiers +; are instrumented. +; RUN: opt < %s -asan -asan-module -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-netbsd" + +module asm ".hidden invalid$c$name" + +@data1 = dso_local global i32 1, align 4 +@data2 = dso_local global i32 2, align 4 +@__invalid$c$name_sym_data1 = internal constant i8* bitcast (i32* @data1 to i8*), section "invalid$c$name", align 8 +@__invalid$c$name_sym_data2 = internal constant i8* bitcast (i32* @data2 to i8*), section "invalid$c$name", align 8 +; CHECK: @"__invalid$c$name_sym_data1" = internal constant{{.*}}, section "invalid$c$name", comdat +; CHECK-NEXT: @"__invalid$c$name_sym_data2" = internal constant{{.*}}, section "invalid$c$name", comdat +; CHECK: @"__asan_global___invalid$c$name_sym_data1" +; CHECK-NEXT: @"__asan_global___invalid$c$name_sym_data2" From 80b108f404fc9e88889df7247f6ae9697083cbda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Thu, 1 Oct 2020 19:31:59 +0200 Subject: [PATCH 468/544] [lldb] [Process/NetBSD] Fix reading FIP/FDP registers Fix reading FIP/FDP registers to correctly return segment and offset parts. On amd64, this roughly matches the Linux behavior of splitting the 64-bit FIP/FDP into two halves, and putting the higher 32 bits into f*seg and lower into f*off. Well, actually we use only 16 bits of higher half but the CPUs do not seem to handle more than that anyway. Differential Revision: https://reviews.llvm.org/D88681 --- .../Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp b/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp index af8b2a2ba794f..089ae4ffff2fe 100644 --- a/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp +++ b/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp @@ -657,13 +657,13 @@ NativeRegisterContextNetBSD_x86_64::ReadRegister(const RegisterInfo *reg_info, reg_value = (uint64_t)m_fpr.fxstate.fx_opcode; break; case lldb_fiseg_x86_64: - reg_value = (uint64_t)m_fpr.fxstate.fx_ip.fa_64; + reg_value = (uint32_t)m_fpr.fxstate.fx_ip.fa_32.fa_seg; break; case lldb_fioff_x86_64: reg_value = (uint32_t)m_fpr.fxstate.fx_ip.fa_32.fa_off; break; case lldb_foseg_x86_64: - reg_value = (uint64_t)m_fpr.fxstate.fx_dp.fa_64; + reg_value = (uint32_t)m_fpr.fxstate.fx_dp.fa_32.fa_seg; break; case lldb_fooff_x86_64: reg_value = (uint32_t)m_fpr.fxstate.fx_dp.fa_32.fa_off; @@ -945,13 +945,13 @@ Status NativeRegisterContextNetBSD_x86_64::WriteRegister( m_fpr.fxstate.fx_opcode = reg_value.GetAsUInt16(); break; case lldb_fiseg_x86_64: - m_fpr.fxstate.fx_ip.fa_64 = reg_value.GetAsUInt64(); + m_fpr.fxstate.fx_ip.fa_32.fa_seg = reg_value.GetAsUInt32(); break; case lldb_fioff_x86_64: m_fpr.fxstate.fx_ip.fa_32.fa_off = reg_value.GetAsUInt32(); break; case lldb_foseg_x86_64: - m_fpr.fxstate.fx_dp.fa_64 = reg_value.GetAsUInt64(); + m_fpr.fxstate.fx_dp.fa_32.fa_seg = reg_value.GetAsUInt32(); break; case lldb_fooff_x86_64: m_fpr.fxstate.fx_dp.fa_32.fa_off = reg_value.GetAsUInt32(); From 9821632056dce9e2150bab9c0fbd9b2c7da64258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Thu, 1 Oct 2020 19:38:49 +0200 Subject: [PATCH 469/544] [lldb] [Process/NetBSD] Fix crash on unsupported i386 regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple fixes related to bugs discovered while debugging a crash when reading all registers on i386. The underlying problem was that GetSetForNativeRegNum() did not account for MPX registers on i386, and since it only compared against upper bounds of each known register set, the MPX registers were classified into the wrong set and therefore considered supported. However, they were not expected in RegNumX86ToX86_64() and caused the assertion to fail. This includes: - adding (unused) i386 → x86_64 translations for MPX registers - fixing GetSetForNativeRegNum() to check both lower and upper bound for register sets, to avoid wrongly classifying unhandled register sets - adding missing range check for MPX registers on i386 - renaming k_last_mpxr to k_last_mpxr_i386 for consistency - replacing return-assertions with llvm_unreachable() and adding more checks for unexpected parameters Differential Revision: https://reviews.llvm.org/D88682 --- .../NativeRegisterContextNetBSD_x86_64.cpp | 67 +++++++++++-------- .../Process/Utility/lldb-x86-register-enums.h | 2 +- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp b/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp index 089ae4ffff2fe..cb1e883258b55 100644 --- a/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp +++ b/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp @@ -278,11 +278,8 @@ NativeRegisterContextNetBSD_x86_64::GetRegisterSet(uint32_t set_index) const { case llvm::Triple::x86_64: return &g_reg_sets_x86_64[set_index]; default: - assert(false && "Unhandled target architecture."); - return nullptr; + llvm_unreachable("Unhandled target architecture."); } - - return nullptr; } static constexpr int RegNumX86ToX86_64(int regnum) { @@ -375,6 +372,15 @@ static constexpr int RegNumX86ToX86_64(int regnum) { case lldb_ymm6_i386: case lldb_ymm7_i386: return lldb_ymm0_x86_64 + regnum - lldb_ymm0_i386; + case lldb_bnd0_i386: + case lldb_bnd1_i386: + case lldb_bnd2_i386: + case lldb_bnd3_i386: + return lldb_bnd0_x86_64 + regnum - lldb_bnd0_i386; + case lldb_bndcfgu_i386: + return lldb_bndcfgu_x86_64; + case lldb_bndstatus_i386: + return lldb_bndstatus_x86_64; case lldb_dr0_i386: case lldb_dr1_i386: case lldb_dr2_i386: @@ -385,8 +391,7 @@ static constexpr int RegNumX86ToX86_64(int regnum) { case lldb_dr7_i386: return lldb_dr0_x86_64 + regnum - lldb_dr0_i386; default: - assert(false && "Unhandled i386 register."); - return 0; + llvm_unreachable("Unhandled i386 register."); } } @@ -394,35 +399,38 @@ int NativeRegisterContextNetBSD_x86_64::GetSetForNativeRegNum( int reg_num) const { switch (GetRegisterInfoInterface().GetTargetArchitecture().GetMachine()) { case llvm::Triple::x86: - if (reg_num <= k_last_gpr_i386) + if (reg_num >= k_first_gpr_i386 && reg_num <= k_last_gpr_i386) return GPRegSet; - else if (reg_num <= k_last_fpr_i386) + if (reg_num >= k_first_fpr_i386 && reg_num <= k_last_fpr_i386) return FPRegSet; - else if (reg_num <= k_last_avx_i386) + if (reg_num >= k_first_avx_i386 && reg_num <= k_last_avx_i386) return XStateRegSet; // AVX - else if (reg_num <= lldb_dr7_i386) + if (reg_num >= k_first_mpxr_i386 && reg_num <= k_last_mpxr_i386) + return -1; // MPXR + if (reg_num >= k_first_mpxc_i386 && reg_num <= k_last_mpxc_i386) + return -1; // MPXC + if (reg_num >= k_first_dbr_i386 && reg_num <= k_last_dbr_i386) return DBRegSet; // DBR - else - return -1; + break; case llvm::Triple::x86_64: - if (reg_num <= k_last_gpr_x86_64) + if (reg_num >= k_first_gpr_x86_64 && reg_num <= k_last_gpr_x86_64) return GPRegSet; - else if (reg_num <= k_last_fpr_x86_64) + if (reg_num >= k_first_fpr_x86_64 && reg_num <= k_last_fpr_x86_64) return FPRegSet; - else if (reg_num <= k_last_avx_x86_64) + if (reg_num >= k_first_avx_x86_64 && reg_num <= k_last_avx_x86_64) return XStateRegSet; // AVX - else if (reg_num <= k_last_mpxr_x86_64) + if (reg_num >= k_first_mpxr_x86_64 && reg_num <= k_last_mpxr_x86_64) return -1; // MPXR - else if (reg_num <= k_last_mpxc_x86_64) + if (reg_num >= k_first_mpxc_x86_64 && reg_num <= k_last_mpxc_x86_64) return -1; // MPXC - else if (reg_num <= lldb_dr7_x86_64) + if (reg_num >= k_first_dbr_x86_64 && reg_num <= k_last_dbr_x86_64) return DBRegSet; // DBR - else - return -1; + break; default: - assert(false && "Unhandled target architecture."); - return -1; + llvm_unreachable("Unhandled target architecture."); } + + llvm_unreachable("Register does not belong to any register set"); } Status NativeRegisterContextNetBSD_x86_64::ReadRegisterSet(uint32_t set) { @@ -511,9 +519,7 @@ NativeRegisterContextNetBSD_x86_64::ReadRegister(const RegisterInfo *reg_info, reg = RegNumX86ToX86_64(reg); break; default: - assert(false && "Unhandled target architecture."); - error.SetErrorString("Unhandled target architecture."); - return error; + llvm_unreachable("Unhandled target architecture."); } error = ReadRegisterSet(set); @@ -758,6 +764,8 @@ NativeRegisterContextNetBSD_x86_64::ReadRegister(const RegisterInfo *reg_info, case lldb_dr7_x86_64: reg_value = (uint64_t)m_dbr.dr[reg - lldb_dr0_x86_64]; break; + default: + llvm_unreachable("Reading unknown/unsupported register"); } return error; @@ -799,9 +807,7 @@ Status NativeRegisterContextNetBSD_x86_64::WriteRegister( reg = RegNumX86ToX86_64(reg); break; default: - assert(false && "Unhandled target architecture."); - error.SetErrorString("Unhandled target architecture."); - return error; + llvm_unreachable("Unhandled target architecture."); } error = ReadRegisterSet(set); @@ -1034,6 +1040,7 @@ Status NativeRegisterContextNetBSD_x86_64::WriteRegister( } #else error.SetErrorString("XState not supported by the kernel"); + return error; #endif break; case lldb_dr0_x86_64: @@ -1046,6 +1053,8 @@ Status NativeRegisterContextNetBSD_x86_64::WriteRegister( case lldb_dr7_x86_64: m_dbr.dr[reg - lldb_dr0_x86_64] = reg_value.GetAsUInt64(); break; + default: + llvm_unreachable("Reading unknown/unsupported register"); } return WriteRegisterSet(set); @@ -1112,7 +1121,7 @@ int NativeRegisterContextNetBSD_x86_64::GetDR(int num) const { case llvm::Triple::x86_64: return lldb_dr0_x86_64 + num; default: - return -1; + llvm_unreachable("Unhandled target architecture."); } } diff --git a/lldb/source/Plugins/Process/Utility/lldb-x86-register-enums.h b/lldb/source/Plugins/Process/Utility/lldb-x86-register-enums.h index 35f1a4075d09b..3bee9393719d5 100644 --- a/lldb/source/Plugins/Process/Utility/lldb-x86-register-enums.h +++ b/lldb/source/Plugins/Process/Utility/lldb-x86-register-enums.h @@ -106,7 +106,7 @@ enum { lldb_bnd1_i386, lldb_bnd2_i386, lldb_bnd3_i386, - k_last_mpxr = lldb_bnd3_i386, + k_last_mpxr_i386 = lldb_bnd3_i386, k_first_mpxc_i386, lldb_bndcfgu_i386 = k_first_mpxc_i386, From 381bdc75ee2ca2fb9784ffb2f6b90accd8eab3b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 30 Sep 2020 15:33:01 +0200 Subject: [PATCH 470/544] [lldb] [test/Register] Add read/write tests for x87 regs Add a partial read/write tests for x87 FPU registers. This includes reading and writing ST registers, control registers and floating-point exception data registers (fop, fip, fdp). The tests assume the current (roughly incorrect) behavior of reporting the 'abridged' 8-bit ftag state as 16-bit ftag. They also assume Linux plugin behavior of reporting fip/fdp split into halves as (fiseg, fioff) and (foseg, fooff). Differential Revision: https://reviews.llvm.org/D88583 --- .../Shell/Register/Inputs/x86-fp-read.cpp | 45 ++++++++++++ .../Shell/Register/Inputs/x86-fp-write.cpp | 69 +++++++++++++++++++ lldb/test/Shell/Register/x86-64-fp-write.test | 48 +++++++++++++ lldb/test/Shell/Register/x86-fp-read.test | 36 ++++++++++ lldb/test/Shell/Register/x86-fp-write.test | 45 ++++++++++++ 5 files changed, 243 insertions(+) create mode 100644 lldb/test/Shell/Register/Inputs/x86-fp-read.cpp create mode 100644 lldb/test/Shell/Register/Inputs/x86-fp-write.cpp create mode 100644 lldb/test/Shell/Register/x86-64-fp-write.test create mode 100644 lldb/test/Shell/Register/x86-fp-read.test create mode 100644 lldb/test/Shell/Register/x86-fp-write.test diff --git a/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp b/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp new file mode 100644 index 0000000000000..1bd2d60affa2e --- /dev/null +++ b/lldb/test/Shell/Register/Inputs/x86-fp-read.cpp @@ -0,0 +1,45 @@ +#include + +struct alignas(16) float80_raw { + uint64_t mantissa; + uint16_t sign_exp; +}; + +int main() { + float80_raw st[] = { + {0x8000000000000000, 0x4000}, // +2.0 + {0x3f00000000000000, 0x0000}, // 1.654785e-4932 (denormal) + {0x0000000000000000, 0x0000}, // +0 + {0x0000000000000000, 0x8000}, // -0 + {0x8000000000000000, 0x7fff}, // +inf + {0x8000000000000000, 0xffff}, // -inf + {0xc000000000000000, 0xffff}, // nan + // leave st7 empty to test tag word better + }; + + // unmask divide-by-zero exception + uint16_t cw = 0x037b; + // used as single-precision float + uint32_t zero = 0; + + asm volatile( + "finit\n\t" + "fldcw %1\n\t" + // load on stack in reverse order to make the result easier to read + "fldt 0x60(%0)\n\t" + "fldt 0x50(%0)\n\t" + "fldt 0x40(%0)\n\t" + "fldt 0x30(%0)\n\t" + "fldt 0x20(%0)\n\t" + "fldt 0x10(%0)\n\t" + "fldt 0x00(%0)\n\t" + // this should trigger a divide-by-zero + "fdivs (%2)\n\t" + "int3\n\t" + : + : "a"(st), "m"(cw), "b"(&zero) + : "st" + ); + + return 0; +} diff --git a/lldb/test/Shell/Register/Inputs/x86-fp-write.cpp b/lldb/test/Shell/Register/Inputs/x86-fp-write.cpp new file mode 100644 index 0000000000000..63791a8eff2e7 --- /dev/null +++ b/lldb/test/Shell/Register/Inputs/x86-fp-write.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +struct alignas(16) float80_raw { + uint8_t data[10]; +}; + +int main() { + float80_raw st[8]; + uint16_t env[14]; + union alignas(16) { + uint16_t i16[256]; + uint32_t i32[128]; + uint64_t i64[64]; + } fxsave; + + asm volatile( + "finit\n\t" + "int3\n\t" +#if defined(__x86_64__) + "fxsave64 %2\n\t" +#else + "fxsave %2\n\t" +#endif + "fnstenv %1\n\t" + "fnclex\n\t" + "fstpt 0x00(%0)\n\t" + "fstpt 0x10(%0)\n\t" + "fstpt 0x20(%0)\n\t" + "fstpt 0x30(%0)\n\t" + "fstpt 0x40(%0)\n\t" + "fstpt 0x50(%0)\n\t" + "fstpt 0x60(%0)\n\t" + "fstpt 0x70(%0)\n\t" + : + : "a"(st), "m"(env), "m"(fxsave) + : "st" + ); + + assert(env[0] == fxsave.i16[0]); + assert(env[2] == fxsave.i16[1]); + + printf("fctrl = 0x%04" PRIx16 "\n", env[0]); + printf("fstat = 0x%04" PRIx16 "\n", env[2]); + printf("ftag = 0x%04" PRIx16 "\n", env[4]); + printf("fop = 0x%04" PRIx16 "\n", fxsave.i16[3]); +#if defined(__x86_64__) + printf("fip = 0x%016" PRIx64 "\n", fxsave.i64[1]); + printf("fdp = 0x%016" PRIx64 "\n", fxsave.i64[2]); +#else + printf("fip = 0x%08" PRIx32 "\n", fxsave.i32[2]); + printf("fcs = 0x%04" PRIx16 "\n", fxsave.i16[6]); + printf("fdp = 0x%08" PRIx32 "\n", fxsave.i32[4]); + printf("fds = 0x%04" PRIx16 "\n", fxsave.i16[10]); +#endif + printf("mxcsr = 0x%08" PRIx32 "\n", fxsave.i32[6]); + printf("mxcsr_mask = 0x%08" PRIx32 "\n", fxsave.i32[7]); + + for (int i = 0; i < 8; ++i) { + printf("st%d = { ", i); + for (int j = 0; j < sizeof(st->data); ++j) + printf("0x%02" PRIx8 " ", st[i].data[j]); + printf("}\n"); + } + + return 0; +} diff --git a/lldb/test/Shell/Register/x86-64-fp-write.test b/lldb/test/Shell/Register/x86-64-fp-write.test new file mode 100644 index 0000000000000..b2e8c271b51bb --- /dev/null +++ b/lldb/test/Shell/Register/x86-64-fp-write.test @@ -0,0 +1,48 @@ +# REQUIRES: native && target-x86_64 +# RUN: %clangxx_host %p/Inputs/x86-fp-write.cpp -o %t +# RUN: %lldb -b -s %s %t | FileCheck %s +process launch + +register write fctrl 0x037b +register write fstat 0x8884 +# note: this needs to enable all registers for writes to be effective +# TODO: fix it to use proper ftag values instead of 'abridged' +register write ftag 0x00ff +register write fop 0x0033 +# the exact addresses do not matter, we want just to verify FXSAVE +# note: fxrstor64 apparently truncates this to 48 bits, and sign extends +# the highest bits, so let's keep the value safely below +register write fiseg 0x00000567 +register write fioff 0x89abcdef +register write foseg 0x00000a98 +register write fooff 0x76543210 + +register write st0 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40}" +register write st1 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00}" +register write st2 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" +register write st3 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80}" +register write st4 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f}" +register write st5 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff}" +register write st6 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff}" +register write st7 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + +process continue + +# CHECK: process continue +# CHECK-DAG: fctrl = 0x037b +# CHECK-DAG: fstat = 0x8884 +# CHECK-DAG: ftag = 0xa961 +# CHECK-DAG: fop = 0x0033 +# CHECK-DAG: fip = 0x0000056789abcdef +# CHECK-DAG: fdp = 0x00000a9876543210 + +# CHECK-DAG: st0 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40 } +# CHECK-DAG: st1 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00 } +# CHECK-DAG: st2 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 } +# CHECK-DAG: st3 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 } +# CHECK-DAG: st4 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f } +# CHECK-DAG: st5 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff } +# CHECK-DAG: st6 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff } +# CHECK-DAG: st7 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 } + +# CHECK: Process {{[0-9]+}} exited with status = 0 diff --git a/lldb/test/Shell/Register/x86-fp-read.test b/lldb/test/Shell/Register/x86-fp-read.test new file mode 100644 index 0000000000000..9ecc5634e7293 --- /dev/null +++ b/lldb/test/Shell/Register/x86-fp-read.test @@ -0,0 +1,36 @@ +# REQUIRES: native && (target-x86 || target-x86_64) +# RUN: %clangxx_host -g %p/Inputs/x86-fp-read.cpp -o %t +# RUN: %lldb -b -s %s %t | FileCheck %s +process launch +# CHECK: Process {{.*}} stopped + +register read --all +# CHECK-DAG: fctrl = 0x037b +# CHECK-DAG: fstat = 0x8884 +# TODO: the following value is incorrect, it's a bug in the way +# FXSAVE/XSAVE is interpreted; it should be 0xa963 once fixed +# CHECK-DAG: ftag = 0x00fe +# CHECK-DAG: fop = 0x0033 + +# CHECK-DAG: st0 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40} +# CHECK-DAG: st1 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00} +# CHECK-DAG: st2 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00} +# CHECK-DAG: st3 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80} +# CHECK-DAG: st4 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f} +# CHECK-DAG: st5 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff} +# CHECK-DAG: st6 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff} +# CHECK-DAG: st7 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00} + +# fdiv (%rbx) gets encoded into 2 bytes, int3 into 1 byte +print (void*)($pc-3) +# CHECK: (void *) $0 = [[FDIV:0x[0-9a-f]*]] +# TODO: we probably should not split it like this +print (void*)($fiseg*0x100000000 + $fioff) +# CHECK: (void *) $1 = [[FDIV]] +print &zero +# CHECK: (uint32_t *) $2 = [[ZERO:0x[0-9a-f]*]] +print (uint32_t*)($foseg * 0x100000000 + $fooff) +# CHECK: (uint32_t *) $3 = [[ZERO]] + +process continue +# CHECK: Process {{[0-9]+}} exited with status = 0 diff --git a/lldb/test/Shell/Register/x86-fp-write.test b/lldb/test/Shell/Register/x86-fp-write.test new file mode 100644 index 0000000000000..81f542c419afe --- /dev/null +++ b/lldb/test/Shell/Register/x86-fp-write.test @@ -0,0 +1,45 @@ +# REQUIRES: native && target-x86 +# RUN: %clangxx_host %p/Inputs/x86-fp-write.cpp -o %t +# RUN: %lldb -b -s %s %t | FileCheck %s +process launch + +register write fctrl 0x037b +register write fstat 0x8884 +# note: this needs to enable all registers for writes to be effective +# TODO: fix it to use proper ftag values instead of 'abridged' +register write ftag 0x00ff +register write fop 0x0033 +# the exact addresses do not matter, we want just to verify FXSAVE +# note: segment registers are not supported on all CPUs +register write fioff 0x89abcdef +register write fooff 0x76543210 + +register write st0 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40}" +register write st1 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00}" +register write st2 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" +register write st3 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80}" +register write st4 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f}" +register write st5 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff}" +register write st6 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff}" +register write st7 "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + +process continue + +# CHECK: process continue +# CHECK-DAG: fctrl = 0x037b +# CHECK-DAG: fstat = 0x8884 +# CHECK-DAG: ftag = 0xa961 +# CHECK-DAG: fop = 0x0033 +# CHECK-DAG: fip = 0x89abcdef +# CHECK-DAG: fdp = 0x76543210 + +# CHECK-DAG: st0 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0x00 0x40 } +# CHECK-DAG: st1 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x3f 0x00 0x00 } +# CHECK-DAG: st2 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 } +# CHECK-DAG: st3 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 } +# CHECK-DAG: st4 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0x7f } +# CHECK-DAG: st5 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x80 0xff 0xff } +# CHECK-DAG: st6 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xc0 0xff 0xff } +# CHECK-DAG: st7 = { 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 } + +# CHECK: Process {{[0-9]+}} exited with status = 0 From fcb0ab59335be185e05258c905ef57da9e7f3324 Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Sat, 3 Oct 2020 13:38:00 -0400 Subject: [PATCH 471/544] [clang][NFC] Change a mention of `objc_static_protocol` to `non_runtime` --- clang/include/clang/AST/DeclObjC.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h index f2c25bceed185..88cedbd91b6d4 100644 --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -2178,8 +2178,8 @@ class ObjCProtocolDecl : public ObjCContainerDecl, data().ReferencedProtocols.set(List, Num, Locs, C); } - /// This is true iff the protocol is tagged with the `objc_static_protocol` - /// attribute. + /// This is true iff the protocol is tagged with the + /// `objc_non_runtime_protocol` attribute. bool isNonRuntimeProtocol() const; /// Get the set of all protocols implied by this protocols inheritance From ba60dc0aa75e86165e260b2c08afafd1c394e95a Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sat, 3 Oct 2020 14:22:53 -0400 Subject: [PATCH 472/544] Revert "[Driver] Move detectLibcxxIncludePath to ToolChain" This reverts commit e25bf2592060e7751f8b14522c97081ce2047175. Breaks tests on Windows, see comments on https://reviews.llvm.org/D88452 --- clang/include/clang/Driver/ToolChain.h | 3 --- clang/lib/Driver/ToolChain.cpp | 23 ------------------- clang/lib/Driver/ToolChains/Fuchsia.cpp | 4 ++-- clang/lib/Driver/ToolChains/Gnu.cpp | 22 +++++++++++++++++- .../basic_fuchsia_tree/include/c++/v1/.keep | 0 5 files changed, 23 insertions(+), 29 deletions(-) delete mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/include/c++/v1/.keep diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index db4c4a7302325..7495e08fe6e64 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -575,9 +575,6 @@ class ToolChain { // given compilation arguments. virtual UnwindLibType GetUnwindLibType(const llvm::opt::ArgList &Args) const; - // Detect the highest available version of libc++ in base path. - virtual std::string detectLibcxxIncludePath(StringRef Base) const; - /// AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set /// the include paths to use for the given C++ standard library type. virtual void diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8e98e32068808..8991216da6765 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -924,29 +924,6 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs, } } -std::string ToolChain::detectLibcxxIncludePath(StringRef Base) const { - std::error_code EC; - int MaxVersion = 0; - std::string MaxVersionString; - for (llvm::vfs::directory_iterator LI = getVFS().dir_begin(Base, EC), LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef VersionText = llvm::sys::path::filename(LI->path()); - int Version; - if (VersionText[0] == 'v' && - !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { - if (Version > MaxVersion) { - MaxVersion = Version; - MaxVersionString = std::string(VersionText); - } - } - } - if (!MaxVersion) - return ""; - SmallString<128> P(Base); - llvm::sys::path::append(P, MaxVersionString); - return std::string(P.str()); -} - void ToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Header search paths should be handled by each of the subclasses. diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index e5f23ee385559..781179be39a36 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -319,8 +319,8 @@ void Fuchsia::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: { SmallString<128> P(getDriver().Dir); - llvm::sys::path::append(P, "..", "include", "c++"); - addSystemInclude(DriverArgs, CC1Args, detectLibcxxIncludePath(P.str())); + llvm::sys::path::append(P, "..", "include", "c++", "v1"); + addSystemInclude(DriverArgs, CC1Args, P.str()); break; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 3778b6f297ed2..f3843685a522b 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2877,11 +2877,31 @@ void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, } } +static std::string DetectLibcxxIncludePath(llvm::vfs::FileSystem &vfs, + StringRef base) { + std::error_code EC; + int MaxVersion = 0; + std::string MaxVersionString; + for (llvm::vfs::directory_iterator LI = vfs.dir_begin(base, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef VersionText = llvm::sys::path::filename(LI->path()); + int Version; + if (VersionText[0] == 'v' && + !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) { + if (Version > MaxVersion) { + MaxVersion = Version; + MaxVersionString = std::string(VersionText); + } + } + } + return MaxVersion ? (base + "/" + MaxVersionString).str() : ""; +} + void Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { auto AddIncludePath = [&](std::string Path) { - std::string IncludePath = detectLibcxxIncludePath(Path); + std::string IncludePath = DetectLibcxxIncludePath(getVFS(), Path); if (IncludePath.empty() || !getVFS().exists(IncludePath)) return false; addSystemInclude(DriverArgs, CC1Args, IncludePath); diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/include/c++/v1/.keep b/clang/test/Driver/Inputs/basic_fuchsia_tree/include/c++/v1/.keep deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 3780a4e568ac763567cc6987372e04f9e3c68ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 23 Sep 2020 14:14:05 +0300 Subject: [PATCH 473/544] [AArch64] Match the windows canonical callee saved register order On windows, the callee saved registers in a canonical prologue are ordered starting from a lower register number at a lower stack address (with the possible gap for aligning the stack at the top); this is the opposite order that llvm normally produces. To achieve this, reverse the order of the registers in the assignCalleeSavedSpillSlots callback, to get the stack objects laid out by PrologEpilogInserter in the right order, and adjust computeCalleeSaveRegisterPairs to lay them out from the bottom up. This allows generated prologs more often to match the format that allows the unwind info to be written as packed info. Differential Revision: https://reviews.llvm.org/D88677 --- .../Target/AArch64/AArch64FrameLowering.cpp | 82 +++++++++++++---- .../lib/Target/AArch64/AArch64FrameLowering.h | 5 ++ llvm/test/CodeGen/AArch64/seh-finally.ll | 14 +-- llvm/test/CodeGen/AArch64/sponentry.ll | 4 +- llvm/test/CodeGen/AArch64/win64_vararg.ll | 66 +++++++------- .../CodeGen/AArch64/wineh-frame-scavenge.mir | 89 +++++++++++++++++++ llvm/test/CodeGen/AArch64/wineh-frame0.mir | 32 +++---- llvm/test/CodeGen/AArch64/wineh-frame1.mir | 48 +++++----- llvm/test/CodeGen/AArch64/wineh-frame2.mir | 32 +++---- llvm/test/CodeGen/AArch64/wineh-frame3.mir | 16 ++-- llvm/test/CodeGen/AArch64/wineh-frame4.mir | 16 ++-- llvm/test/CodeGen/AArch64/wineh-frame5.mir | 16 ++-- .../AArch64/wineh-try-catch-realign.ll | 24 ++--- llvm/test/CodeGen/AArch64/wineh-try-catch.ll | 38 ++++---- .../test/CodeGen/AArch64/wineh_shrinkwrap.mir | 2 +- 15 files changed, 313 insertions(+), 171 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dde2b06a36f05..e77899fa8d7a1 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2077,14 +2077,22 @@ static void computeCalleeSaveRegisterPairs( (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); int ByteOffset = AFI->getCalleeSavedStackSize(); + int StackFillDir = -1; + int RegInc = 1; + unsigned FirstReg = 0; + if (NeedsWinCFI) { + // For WinCFI, fill the stack from the bottom up. + ByteOffset = 0; + StackFillDir = 1; + // As the CSI array is reversed to match PrologEpilogInserter, iterate + // backwards, to pair up registers starting from lower numbered registers. + RegInc = -1; + FirstReg = Count - 1; + } int ScalableByteOffset = AFI->getSVECalleeSavedStackSize(); - // On Linux, we will have either one or zero non-paired register. On Windows - // with CFI, we can have multiple unpaired registers in order to utilize the - // available unwind codes. This flag assures that the alignment fixup is done - // only once, as intened. - bool FixupDone = false; - for (unsigned i = 0; i < Count; ++i) { + // When iterating backwards, the loop condition relies on unsigned wraparound. + for (unsigned i = FirstReg; i < Count; i += RegInc) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); @@ -2102,8 +2110,8 @@ static void computeCalleeSaveRegisterPairs( llvm_unreachable("Unsupported register class."); // Add the next reg to the pair if it is in the same register class. - if (i + 1 < Count) { - unsigned NextReg = CSI[i + 1].getReg(); + if (unsigned(i + RegInc) < Count) { + unsigned NextReg = CSI[i + RegInc].getReg(); switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && @@ -2142,7 +2150,7 @@ static void computeCalleeSaveRegisterPairs( // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || - (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && + (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) && "Out of order callee saved regs!"); assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || @@ -2164,30 +2172,43 @@ static void computeCalleeSaveRegisterPairs( "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); + if (NeedsWinCFI && + RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair + RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); int Scale = RPI.getScale(); + + int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset; + assert(OffsetPre % Scale == 0); + if (RPI.isScalable()) - ScalableByteOffset -= Scale; + ScalableByteOffset += StackFillDir * Scale; else - ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale; + ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); assert(!(RPI.isScalable() && RPI.isPaired()) && "Paired spill/fill instructions don't exist for SVE vectors"); // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. - if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone && + if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI && !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) { - FixupDone = true; - ByteOffset -= 8; + ByteOffset += 8 * StackFillDir; assert(ByteOffset % 16 == 0); assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); + // A stack frame with a gap looks like this, bottom up: + // d9, d8. x21, gap, x20, x19. + // Set extra alignment on the x21 object (the only unpaired register) + // to create the gap above it. MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); } - int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset; - assert(Offset % Scale == 0); + int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; + assert(OffsetPost % Scale == 0); + // If filling top down (default), we want the offset after incrementing it. + // If fillibg bootom up (WinCFI) we need the original offset. + int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; RPI.Offset = Offset / Scale; assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) || @@ -2204,7 +2225,19 @@ static void computeCalleeSaveRegisterPairs( RegPairs.push_back(RPI); if (RPI.isPaired()) - ++i; + i += RegInc; + } + if (NeedsWinCFI) { + // If we need an alignment gap in the stack, align the topmost stack + // object. A stack frame with a gap looks like this, bottom up: + // x19, d8. d9, gap. + // Set extra alignment on the topmost stack object (the first element in + // CSI, which goes top down), to create the gap above it. + if (AFI->hasCalleeSaveStackFreeSpace()) + MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16)); + // We iterated bottom up over the registers; flip RegPairs back to top + // down order. + std::reverse(RegPairs.begin(), RegPairs.end()); } } @@ -2636,6 +2669,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); } +bool AArch64FrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI) const { + bool NeedsWinCFI = needsWinCFI(MF); + // To match the canonical windows frame layout, reverse the list of + // callee saved registers to get them laid out by PrologEpilogInserter + // in the right order. (PrologEpilogInserter allocates stack objects top + // down. Windows canonical prologs store higher numbered registers at + // the top, thus have the CSI array start from the highest registers.) + if (NeedsWinCFI) + std::reverse(CSI.begin(), CSI.end()); + // Let the generic code do the rest of the setup. + return false; +} + bool AArch64FrameLowering::enableStackSlotScavenging( const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 1ca8c3e9e2bf6..270353790dcf6 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -67,6 +67,11 @@ class AArch64FrameLowering : public TargetFrameLowering { bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; diff --git a/llvm/test/CodeGen/AArch64/seh-finally.ll b/llvm/test/CodeGen/AArch64/seh-finally.ll index dbc6c4b0804bf..79e85d972b0b6 100644 --- a/llvm/test/CodeGen/AArch64/seh-finally.ll +++ b/llvm/test/CodeGen/AArch64/seh-finally.ll @@ -86,12 +86,12 @@ entry: define void @stack_realign() #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: ; CHECK-LABEL: stack_realign -; CHECK: mov x29, sp +; CHECK: add x29, sp, #8 ; CHECK: sub x9, sp, #16 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x29, #32] +; CHECK: stur x0, [x29, #24] ; CHECK: .set .Lstack_realign$frame_escape_0, 0 ; CHECK: ldr w0, [x19] ; CHECK: bl foo @@ -205,17 +205,17 @@ entry: define void @vla_and_realign(i32 %n) #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { entry: ; CHECK-LABEL: vla_and_realign -; CHECK: mov x29, sp +; CHECK: add x29, sp, #8 ; CHECK: sub x9, sp, #48 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x1, #-2 -; CHECK: stur x1, [x29, #32] +; CHECK: stur x1, [x29, #24] ; CHECK: .set .Lvla_and_realign$frame_escape_0, 32 -; CHECK: str w0, [x29, #44] -; CHECK: ldr w8, [x29, #44] +; CHECK: str w0, [x29, #36] +; CHECK: ldr w8, [x29, #36] ; CHECK: mov x9, sp -; CHECK: str x9, [x29, #24] +; CHECK: str x9, [x29, #16] ; CHECK: str x8, [x19, #24] ; CHECK: ldr w0, [x19, #32] ; CHECK: bl foo diff --git a/llvm/test/CodeGen/AArch64/sponentry.ll b/llvm/test/CodeGen/AArch64/sponentry.ll index c308eb4084d9d..1b9bc64ef15c1 100644 --- a/llvm/test/CodeGen/AArch64/sponentry.ll +++ b/llvm/test/CodeGen/AArch64/sponentry.ll @@ -38,8 +38,8 @@ define dso_local void @foo([24 x i64]*) { ; CHECK: foo: ; CHECK: sub sp, sp, #448 -; CHECK: add x29, sp, #416 -; CHECK: add x1, x29, #32 +; CHECK: add x29, sp, #424 +; CHECK: add x1, x29, #24 ; CHECK: bl _setjmpex ; NOFP: sub sp, sp, #432 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll index 23e2591048dff..4a60717f04a46 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -103,21 +103,21 @@ declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed declare i64* @__local_stdio_printf_options() local_unnamed_addr #4 ; CHECK-LABEL: fp -; CHECK: stp x29, x30, [sp, #-96] +; CHECK: stp x19, x20, [sp, #-96] ; CHECK: str x21, [sp, #16] -; CHECK: stp x19, x20, [sp, #32] -; CHECK: mov x29, sp -; CHECK: add x8, x29, #56 +; CHECK: stp x29, x30, [sp, #24] +; CHECK: add x29, sp, #24 +; CHECK: add x8, x29, #32 ; CHECK: mov x19, x2 ; CHECK: mov x20, x1 ; CHECK: mov x21, x0 -; CHECK: stp x3, x4, [x29, #56] -; CHECK: stp x5, x6, [x29, #72] -; CHECK: str x7, [x29, #88] -; CHECK: str x8, [x29, #24] +; CHECK: stp x3, x4, [x29, #32] +; CHECK: stp x5, x6, [x29, #48] +; CHECK: str x7, [x29, #64] +; CHECK: str x8, [x29, #16] ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] -; CHECK: add x5, x29, #56 +; CHECK: add x5, x29, #32 ; CHECK: mov x1, x21 ; CHECK: mov x2, x20 ; CHECK: orr x0, x8, #0x2 @@ -126,9 +126,9 @@ declare i64* @__local_stdio_printf_options() local_unnamed_addr #4 ; CHECK: bl __stdio_common_vsprintf ; CHECK: cmp w0, #0 ; CHECK: csinv w0, w0, wzr, ge -; CHECK: ldp x19, x20, [sp, #32] +; CHECK: ldp x29, x30, [sp, #24] ; CHECK: ldr x21, [sp, #16] -; CHECK: ldp x29, x30, [sp], #96 +; CHECK: ldp x19, x20, [sp], #96 ; CHECK: ret define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 { %4 = alloca i8*, align 8 @@ -150,26 +150,26 @@ define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 { attributes #6 = { "frame-pointer"="all" } ; CHECK-LABEL: vla -; CHECK: stp x29, x30, [sp, #-112]! -; CHECK: str x23, [sp, #16] -; CHECK: stp x21, x22, [sp, #32] -; CHECK: stp x19, x20, [sp, #48] -; CHECK: mov x29, sp -; CHECK: add x8, x29, #64 -; CHECK: str x8, [x29, #24] +; CHECK: stp x19, x20, [sp, #-112]! +; CHECK: stp x21, x22, [sp, #16] +; CHECK: str x23, [sp, #32] +; CHECK: stp x29, x30, [sp, #40] +; CHECK: add x29, sp, #40 +; CHECK: add x8, x29, #24 +; CHECK: str x8, [x29, #16] ; CHECK: mov w8, w0 ; CHECK: add x8, x8, #15 ; CHECK: lsr x15, x8, #4 ; CHECK: mov x19, x1 ; CHECK: mov [[REG2:x[0-9]+]], sp -; CHECK: stp x2, x3, [x29, #64] -; CHECK: stp x4, x5, [x29, #80] -; CHECK: stp x6, x7, [x29, #96] +; CHECK: stp x2, x3, [x29, #24] +; CHECK: stp x4, x5, [x29, #40] +; CHECK: stp x6, x7, [x29, #56] ; CHECK: bl __chkstk ; CHECK: mov x8, sp ; CHECK: sub [[REG:x[0-9]+]], x8, x15, lsl #4 ; CHECK: mov sp, [[REG]] -; CHECK: ldr [[REG3:x[0-9]+]], [x29, #24] +; CHECK: ldr [[REG3:x[0-9]+]], [x29, #16] ; CHECK: sxtw [[REG4:x[0-9]+]], w0 ; CHECK: bl __local_stdio_printf_options ; CHECK: ldr x8, [x0] @@ -181,11 +181,11 @@ attributes #6 = { "frame-pointer"="all" } ; CHECK: mov x5, [[REG3]] ; CHECK: bl __stdio_common_vsprintf ; CHECK: mov sp, [[REG2]] -; CHECK: mov sp, x29 -; CHECK: ldp x19, x20, [sp, #48] -; CHECK: ldp x21, x22, [sp, #32] -; CHECK: ldr x23, [sp, #16] -; CHECK: ldp x29, x30, [sp], #112 +; CHECK: sub sp, x29, #40 +; CHECK: ldp x29, x30, [sp, #40] +; CHECK: ldr x23, [sp, #32] +; CHECK: ldp x21, x22, [sp, #16] +; CHECK: ldp x19, x20, [sp], #112 ; CHECK: ret define void @vla(i32, i8*, ...) local_unnamed_addr { %3 = alloca i8*, align 8 @@ -212,9 +212,9 @@ declare void @llvm.stackrestore(i8*) ; CHECK-LABEL: snprintf ; CHECK-DAG: sub sp, sp, #96 -; CHECK-DAG: str x30, [sp, #16] -; CHECK-DAG: str x21, [sp, #24] -; CHECK-DAG: stp x19, x20, [sp, #32] +; CHECK-DAG: stp x19, x20, [sp, #16] +; CHECK-DAG: str x21, [sp, #32] +; CHECK-DAG: str x30, [sp, #40] ; CHECK-DAG: add x8, sp, #56 ; CHECK-DAG: mov x19, x2 ; CHECK-DAG: mov x20, x1 @@ -232,9 +232,9 @@ declare void @llvm.stackrestore(i8*) ; CHECK-DAG: mov x3, x19 ; CHECK-DAG: mov x4, xzr ; CHECK-DAG: bl __stdio_common_vsprintf -; CHECK-DAG: ldr x30, [sp, #16] -; CHECK-DAG: ldr x21, [sp, #24] -; CHECK-DAG: ldp x19, x20, [sp, #32] +; CHECK-DAG: ldr x30, [sp, #40] +; CHECK-DAG: ldr x21, [sp, #32] +; CHECK-DAG: ldp x19, x20, [sp, #16] ; CHECK-DAG: cmp w0, #0 ; CHECK-DAG: csinv w0, w0, wzr, ge ; CHECK-DAG: add sp, sp, #96 diff --git a/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir b/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir new file mode 100644 index 0000000000000..299482713fe79 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir @@ -0,0 +1,89 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: -stop-after=prologepilog | FileCheck %s +# Check where the stack variable is placed + +# CHECK: - { id: 0, name: '', type: default, offset: -4, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: local-offset: -4, debug-info-variable: '', debug-info-expression: '', +# CHECK-NEXT: debug-info-location: '' } +# CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, +# CHECK-NEXT: stack-id: default, callee-saved-register: '$x23', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, +# CHECK-NEXT: stack-id: default, callee-saved-register: '$x22', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8, +# CHECK-NEXT: stack-id: default, callee-saved-register: '$x21', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 4, name: '', type: spill-slot, offset: -40, size: 8, alignment: 8, +# CHECK-NEXT: stack-id: default, callee-saved-register: '$x20', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 5, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, +# CHECK-NEXT: stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -6 :: (store 8 into %stack.4), (store 8 into %stack.5) +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -48 +# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 2 :: (store 8 into %stack.2), (store 8 into %stack.3) +# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 22, 16 +# CHECK-NEXT: frame-setup STRXui killed $x23, $sp, 4 :: (store 8 into %stack.1) +# CHECK-NEXT: frame-setup SEH_SaveReg 23, 32 +# CHECK-NEXT: frame-setup SEH_PrologEnd + + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x0, $x23, $x21, $x22, $x19, $x20 + + renamable $x8 = ADDXri %stack.0, 0, 0 + $x19 = ADDXrr $x0, $x8 + $x20 = ADDXrr $x19, $x0 + $x21 = ADDXrr $x20, killed $x19 + $x22 = ADDXrr $x21, killed $x20 + $x23 = ADDXrr $x22, killed $x21 + $x0 = ADDXrr $x0, killed $x23 + + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/wineh-frame0.mir b/llvm/test/CodeGen/AArch64/wineh-frame0.mir index c7e9e19fca19e..9abc8699fd951 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame0.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame0.mir @@ -2,28 +2,28 @@ # RUN: -stop-after=prologepilog | FileCheck %s # Check save_regp_x, save_regp -# CHECK: early-clobber $sp = frame-setup STPXpre killed $x27, killed $x28, $sp, -10 -# CHECK-NEXT: frame-setup SEH_SaveRegP_X 27, 28, -80 -# CHECK-NEXT: frame-setup STPXi killed $x25, killed $x26, $sp, 2 -# CHECK-NEXT: frame-setup SEH_SaveRegP 25, 26, 16 +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -10 +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -80 +# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 2 +# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 22, 16 # CHECK-NEXT: frame-setup STPXi killed $x23, killed $x24, $sp, 4 # CHECK-NEXT: frame-setup SEH_SaveRegP 23, 24, 32 -# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 6 -# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 22, 48 -# CHECK-NEXT: frame-setup STPXi killed $x19, killed $x20, $sp, 8 -# CHECK-NEXT: frame-setup SEH_SaveRegP 19, 20, 64 +# CHECK-NEXT: frame-setup STPXi killed $x25, killed $x26, $sp, 6 +# CHECK-NEXT: frame-setup SEH_SaveRegP 25, 26, 48 +# CHECK-NEXT: frame-setup STPXi killed $x27, killed $x28, $sp, 8 +# CHECK-NEXT: frame-setup SEH_SaveRegP 27, 28, 64 # CHECK-NEXT: frame-setup SEH_PrologEnd # CHECK: frame-destroy SEH_EpilogStart -# CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 8 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 64 -# CHECK-NEXT: $x21, $x22 = frame-destroy LDPXi $sp, 6 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 22, 48 +# CHECK-NEXT: $x27, $x28 = frame-destroy LDPXi $sp, 8 +# CHECK-NEXT: frame-destroy SEH_SaveRegP 27, 28, 64 +# CHECK-NEXT: $x25, $x26 = frame-destroy LDPXi $sp, 6 +# CHECK-NEXT: frame-destroy SEH_SaveRegP 25, 26, 48 # CHECK-NEXT: $x23, $x24 = frame-destroy LDPXi $sp, 4 # CHECK-NEXT: frame-destroy SEH_SaveRegP 23, 24, 32 -# CHECK-NEXT: $x25, $x26 = frame-destroy LDPXi $sp, 2 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 25, 26, 16 -# CHECK-NEXT: early-clobber $sp, $x27, $x28 = frame-destroy LDPXpost $sp, 10 -# CHECK-NEXT: frame-destroy SEH_SaveRegP_X 27, 28, -80 +# CHECK-NEXT: $x21, $x22 = frame-destroy LDPXi $sp, 2 +# CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 22, 16 +# CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 10 +# CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -80 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh-frame1.mir b/llvm/test/CodeGen/AArch64/wineh-frame1.mir index 3604f610c95a6..1d4f26466d9d2 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame1.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame1.mir @@ -2,36 +2,36 @@ # RUN: -stop-after=prologepilog | FileCheck %s # Check save_fregp_x, save_fregp -# CHECK: early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 -# CHECK-NEXT: frame-setup SEH_SaveFRegP_X 10, 11, -112 -# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 2 -# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 16 -# CHECK-NEXT: frame-setup STPXi killed $x27, killed $x28, $sp, 4 -# CHECK-NEXT: frame-setup SEH_SaveRegP 27, 28, 32 +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -14 +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -112 +# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 2 +# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 22, 16 +# CHECK-NEXT: frame-setup STPXi killed $x23, killed $x24, $sp, 4 +# CHECK-NEXT: frame-setup SEH_SaveRegP 23, 24, 32 # CHECK-NEXT: frame-setup STPXi killed $x25, killed $x26, $sp, 6 # CHECK-NEXT: frame-setup SEH_SaveRegP 25, 26, 48 -# CHECK-NEXT: frame-setup STPXi killed $x23, killed $x24, $sp, 8 -# CHECK-NEXT: frame-setup SEH_SaveRegP 23, 24, 64 -# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 10 -# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 22, 80 -# CHECK-NEXT: frame-setup STPXi killed $x19, killed $x20, $sp, 12 -# CHECK-NEXT: frame-setup SEH_SaveRegP 19, 20, 96 +# CHECK-NEXT: frame-setup STPXi killed $x27, killed $x28, $sp, 8 +# CHECK-NEXT: frame-setup SEH_SaveRegP 27, 28, 64 +# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 10 +# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 80 +# CHECK-NEXT: frame-setup STPDi killed $d10, killed $d11, $sp, 12 +# CHECK-NEXT: frame-setup SEH_SaveFRegP 10, 11, 96 # CHECK-NEXT: frame-setup SEH_PrologEnd # CHECK: frame-destroy SEH_EpilogStart -# CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 12 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 96 -# CHECK-NEXT: $x21, $x22 = frame-destroy LDPXi $sp, 10 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 22, 80 -# CHECK-NEXT: $x23, $x24 = frame-destroy LDPXi $sp, 8 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 23, 24, 64 +# CHECK-NEXT: $d10, $d11 = frame-destroy LDPDi $sp, 12 +# CHECK-NEXT: frame-destroy SEH_SaveFRegP 10, 11, 96 +# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 10 +# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 80 +# CHECK-NEXT: $x27, $x28 = frame-destroy LDPXi $sp, 8 +# CHECK-NEXT: frame-destroy SEH_SaveRegP 27, 28, 64 # CHECK-NEXT: $x25, $x26 = frame-destroy LDPXi $sp, 6 # CHECK-NEXT: frame-destroy SEH_SaveRegP 25, 26, 48 -# CHECK-NEXT: $x27, $x28 = frame-destroy LDPXi $sp, 4 -# CHECK-NEXT: frame-destroy SEH_SaveRegP 27, 28, 32 -# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 2 -# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 16 -# CHECK-NEXT: early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 -# CHECK-NEXT: frame-destroy SEH_SaveFRegP_X 10, 11, -112 +# CHECK-NEXT: $x23, $x24 = frame-destroy LDPXi $sp, 4 +# CHECK-NEXT: frame-destroy SEH_SaveRegP 23, 24, 32 +# CHECK-NEXT: $x21, $x22 = frame-destroy LDPXi $sp, 2 +# CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 22, 16 +# CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 14 +# CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -112 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/wineh-frame2.mir b/llvm/test/CodeGen/AArch64/wineh-frame2.mir index be7598ce49426..cdfa87916b74e 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame2.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame2.mir @@ -2,24 +2,24 @@ # RUN: -stop-after=prologepilog | FileCheck %s # Check save_freg_x, save_frep, save_reg -# CHECK: early-clobber $sp = frame-setup STRDpre killed $d12, $sp, -48 -# CHECK-NEXT: frame-setup SEH_SaveFReg_X 12, -48 -# CHECK-NEXT: frame-setup STPDi killed $d10, killed $d11, $sp, 1 -# CHECK-NEXT: frame-setup SEH_SaveFRegP 10, 11, 8 -# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 3 -# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 24 -# CHECK-NEXT: frame-setup STRXui killed $x19, $sp, 5 -# CHECK-NEXT: frame-setup SEH_SaveReg 19, 40 +# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -48 +# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -48 +# CHECK-NEXT: frame-setup STPDi killed $d8, killed $d9, $sp, 1 +# CHECK-NEXT: frame-setup SEH_SaveFRegP 8, 9, 8 +# CHECK-NEXT: frame-setup STPDi killed $d10, killed $d11, $sp, 3 +# CHECK-NEXT: frame-setup SEH_SaveFRegP 10, 11, 24 +# CHECK-NEXT: frame-setup STRDui killed $d12, $sp, 5 +# CHECK-NEXT: frame-setup SEH_SaveFReg 12, 40 # CHECK-NEXT: frame-setup SEH_PrologEnd # CHECK: frame-destroy SEH_EpilogStart -# CHECK-NEXT: $x19 = frame-destroy LDRXui $sp, 5 -# CHECK-NEXT: frame-destroy SEH_SaveReg 19, 40 -# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 3 -# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 24 -# CHECK-NEXT: $d10, $d11 = frame-destroy LDPDi $sp, 1 -# CHECK-NEXT: frame-destroy SEH_SaveFRegP 10, 11, 8 -# CHECK-NEXT: early-clobber $sp, $d12 = frame-destroy LDRDpost $sp, 48 -# CHECK-NEXT: frame-destroy SEH_SaveFReg_X 12, -48 +# CHECK-NEXT: $d12 = frame-destroy LDRDui $sp, 5 +# CHECK-NEXT: frame-destroy SEH_SaveFReg 12, 40 +# CHECK-NEXT: $d10, $d11 = frame-destroy LDPDi $sp, 3 +# CHECK-NEXT: frame-destroy SEH_SaveFRegP 10, 11, 24 +# CHECK-NEXT: $d8, $d9 = frame-destroy LDPDi $sp, 1 +# CHECK-NEXT: frame-destroy SEH_SaveFRegP 8, 9, 8 +# CHECK-NEXT: early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 48 +# CHECK-NEXT: frame-destroy SEH_SaveReg_X 19, -48 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/wineh-frame3.mir b/llvm/test/CodeGen/AArch64/wineh-frame3.mir index 542192e1149ff..30d804eeb8e54 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame3.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame3.mir @@ -2,16 +2,16 @@ # RUN: -stop-after=prologepilog | FileCheck %s # Check save_reg_x, save_reg -# CHECK: early-clobber $sp = frame-setup STRXpre killed $x22, $sp, -16 -# CHECK-NEXT: frame-setup SEH_SaveReg_X 22, -16 -# CHECK-NEXT: frame-setup STRXui killed $x19, $sp, 1 -# CHECK-NEXT: frame-setup SEH_SaveReg 19, 8 +# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16 +# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -16 +# CHECK-NEXT: frame-setup STRXui killed $x22, $sp, 1 +# CHECK-NEXT: frame-setup SEH_SaveReg 22, 8 # CHECK-NEXT: frame-setup SEH_PrologEnd # CHECK: frame-destroy SEH_EpilogStart -# CHECK-NEXT: $x19 = frame-destroy LDRXui $sp, 1 -# CHECK-NEXT: frame-destroy SEH_SaveReg 19, 8 -# CHECK-NEXT: early-clobber $sp, $x22 = frame-destroy LDRXpost $sp, 16 -# CHECK-NEXT: frame-destroy SEH_SaveReg_X 22, -16 +# CHECK-NEXT: $x22 = frame-destroy LDRXui $sp, 1 +# CHECK-NEXT: frame-destroy SEH_SaveReg 22, 8 +# CHECK-NEXT: early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: frame-destroy SEH_SaveReg_X 19, -16 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/wineh-frame4.mir b/llvm/test/CodeGen/AArch64/wineh-frame4.mir index 0f525450908fc..e9135f1bcf599 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame4.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame4.mir @@ -2,16 +2,16 @@ # RUN: -stop-after=prologepilog | FileCheck %s # Check save_freg_x, save_freg -# CHECK: early-clobber $sp = frame-setup STRDpre killed $d10, $sp, -16 -# CHECK-NEXT: frame-setup SEH_SaveFReg_X 10, -16 -# CHECK-NEXT: frame-setup STRDui killed $d8, $sp, 1 :: (store 8 into %stack.0) -# CHECK-NEXT: frame-setup SEH_SaveFReg 8, 8 +# CHECK: early-clobber $sp = frame-setup STRDpre killed $d8, $sp, -16 +# CHECK-NEXT: frame-setup SEH_SaveFReg_X 8, -16 +# CHECK-NEXT: frame-setup STRDui killed $d10, $sp, 1 :: (store 8 into %stack.0) +# CHECK-NEXT: frame-setup SEH_SaveFReg 10, 8 # CHECK-NEXT: frame-setup SEH_PrologEnd # CHECK: frame-destroy SEH_EpilogStart -# CHECK-NEXT: $d8 = frame-destroy LDRDui $sp, 1 :: (load 8 from %stack.0) -# CHECK-NEXT: frame-destroy SEH_SaveFReg 8, 8 -# CHECK-NEXT: early-clobber $sp, $d10 = frame-destroy LDRDpost $sp, 16 :: (load 8 from %stack.1) -# CHECK-NEXT: frame-destroy SEH_SaveFReg_X 10, -16 +# CHECK-NEXT: $d10 = frame-destroy LDRDui $sp, 1 :: (load 8 from %stack.0) +# CHECK-NEXT: frame-destroy SEH_SaveFReg 10, 8 +# CHECK-NEXT: early-clobber $sp, $d8 = frame-destroy LDRDpost $sp, 16 :: (load 8 from %stack.1) +# CHECK-NEXT: frame-destroy SEH_SaveFReg_X 8, -16 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/wineh-frame5.mir b/llvm/test/CodeGen/AArch64/wineh-frame5.mir index 994fa80954fa4..b1708db24aa12 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame5.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame5.mir @@ -3,10 +3,10 @@ # Check multiple epilogues, save_reg, save_reg_x. # CHECK-LABEL: bb.0.entry: -# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -4 -# CHECK-NEXT: frame-setup SEH_SaveFPLR_X -32 -# CHECK-NEXT: frame-setup STRXui killed $x19, $sp, 2 -# CHECK-NEXT: frame-setup SEH_SaveReg 19, 16 +# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -32 +# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -32 +# CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 1 +# CHECK-NEXT: frame-setup SEH_SaveFPLR 8 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 496, 0 # CHECK-NEXT: frame-setup SEH_StackAlloc 496 # CHECK-NEXT: frame-setup SEH_PrologEnd @@ -15,10 +15,10 @@ # CHECK: frame-destroy SEH_EpilogStart # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 496, 0 # CHECK-NEXT: frame-destroy SEH_StackAlloc 496 -# CHECK-NEXT: $x19 = frame-destroy LDRXui $sp, 2 -# CHECK-NEXT: frame-destroy SEH_SaveReg 19, 16 -# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 4 -# CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -32 +# CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 1 +# CHECK-NEXT: frame-destroy SEH_SaveFPLR 8 +# CHECK-NEXT: early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 32 +# CHECK-NEXT: frame-destroy SEH_SaveReg_X 19, -32 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: TCRETURNdi @"?func2@@YAHXZ", 0, csr_aarch64_aapcs, implicit $sp diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll index e81932112613d..85b3631c459f7 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll @@ -9,12 +9,12 @@ ; it shouldn't access the parent's frame via sp, and the prologue and ; epilogue should be symmetrical. ; CHECK-LABEL: "?catch$2@?0??a@@YAXXZ@4HA": -; CHECK: stp x29, x30, [sp, #-32]! -; CHECK-NEXT: .seh_save_fplr_x 32 -; CHECK-NEXT: str x28, [sp, #16] -; CHECK-NEXT: .seh_save_reg x28, 16 -; CHECK-NEXT: str x19, [sp, #24] -; CHECK-NEXT: .seh_save_reg x19, 24 +; CHECK: str x19, [sp, #-32]! +; CHECK-NEXT: .seh_save_reg_x x19, 32 +; CHECK-NEXT: str x28, [sp, #8] +; CHECK-NEXT: .seh_save_reg x28, 8 +; CHECK-NEXT: stp x29, x30, [sp, #16] +; CHECK-NEXT: .seh_save_fplr 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: add x0, x19, #0 ; CHECK-NEXT: mov w1, wzr @@ -22,12 +22,12 @@ ; CHECK-NEXT: adrp x0, .LBB0_1 ; CHECK-NEXT: add x0, x0, .LBB0_1 ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x19, [sp, #24] -; CHECK-NEXT: .seh_save_reg x19, 24 -; CHECK-NEXT: ldr x28, [sp, #16] -; CHECK-NEXT: .seh_save_reg x28, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #32 -; CHECK-NEXT: .seh_save_fplr_x 32 +; CHECK-NEXT: ldp x29, x30, [sp, #16] +; CHECK-NEXT: .seh_save_fplr 16 +; CHECK-NEXT: ldr x28, [sp, #8] +; CHECK-NEXT: .seh_save_reg x28, 8 +; CHECK-NEXT: ldr x19, [sp], #32 +; CHECK-NEXT: .seh_save_reg_x x19, 32 ; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll index 73909825d377d..8bf5aa33e24a4 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll @@ -15,15 +15,15 @@ ; We check this offset in the table later on. ; CHECK-LABEL: "?func@@YAHXZ": -; CHECK: stp x29, x30, [sp, #-64]! -; CHECK: str x28, [sp, #16] -; CHECK: str x21, [sp, #24] -; CHECK: stp x19, x20, [sp, #32] -; CHECK: mov x29, sp +; CHECK: stp x19, x20, [sp, #-64]! +; CHECK: str x21, [sp, #16] +; CHECK: str x28, [sp, #24] +; CHECK: stp x29, x30, [sp, #32] +; CHECK: add x29, sp, #32 ; CHECK: sub sp, sp, #624 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x29, #48] +; CHECK: stur x0, [x29, #16] ; Now check that x is stored at fp - 20. We check that this is the same ; location accessed from the funclet to retrieve x. @@ -47,10 +47,10 @@ ; CHECK-LABEL: "?catch$2@?0??func@@YAHXZ@4HA": ; Check that the stack space is allocated only for the callee saved registers. -; CHECK: stp x29, x30, [sp, #-48]! -; CHECK: str x28, [sp, #16] -; CHECK: str x21, [sp, #24] -; CHECK: stp x19, x20, [sp, #32] +; CHECK: stp x19, x20, [sp, #-48]! +; CHECK: str x21, [sp, #16] +; CHECK: str x28, [sp, #24] +; CHECK: stp x29, x30, [sp, #32] ; CHECK: add x20, x19, #12 ; Check that there are no further stack updates. @@ -87,18 +87,18 @@ ; UNWIND: Prologue [ ; UNWIND-NEXT: ; nop ; UNWIND-NEXT: ; sub sp, #624 -; UNWIND-NEXT: ; mov fp, sp -; UNWIND-NEXT: ; stp x19, x20, [sp, #32] -; UNWIND-NEXT: ; str x21, [sp, #24] -; UNWIND-NEXT: ; str x28, [sp, #16] -; UNWIND-NEXT: ; stp x29, x30, [sp, #-64]! +; UNWIND-NEXT: ; add fp, sp, #32 +; UNWIND-NEXT: ; stp x29, x30, [sp, #32] +; UNWIND-NEXT: ; str x28, [sp, #24] +; UNWIND-NEXT: ; str x21, [sp, #16] +; UNWIND-NEXT: ; stp x19, x20, [sp, #-64]! ; UNWIND-NEXT: ; end ; UNWIND: Function: ?catch$2@?0??func@@YAHXZ@4HA ; UNWIND: Prologue [ -; UNWIND-NEXT: ; stp x19, x20, [sp, #32] -; UNWIND-NEXT: ; str x21, [sp, #24] -; UNWIND-NEXT: ; str x28, [sp, #16] -; UNWIND-NEXT: ; stp x29, x30, [sp, #-48]! +; UNWIND-NEXT: ; stp x29, x30, [sp, #32] +; UNWIND-NEXT: ; str x28, [sp, #24] +; UNWIND-NEXT: ; str x21, [sp, #16] +; UNWIND-NEXT: ; stp x19, x20, [sp, #-48]! ; UNWIND-NEXT: ; end target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir b/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir index 4f45b349c5fab..51bb8ff0d393f 100644 --- a/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir +++ b/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir @@ -9,7 +9,7 @@ # The same test gets shrink wrapped on Linux ARM64. # WIN64-LABEL: bb.0.entry: -# WIN64: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -4 +# WIN64: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -32 # WIN64-LABEL: bb.1: # WIN64-LABEL: bb.2.if.then: From 890af2f003c83349ff5917d80023b8b796f76489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 23 Sep 2020 14:26:45 +0300 Subject: [PATCH 474/544] [AArch64] Allow pairing lr with other GPRs for WinCFI This saves one instruction per prologue/epilogue for any function with an odd number of callee-saved GPRs, but more importantly, allows such functions to match the packed unwind format. Differential Revision: https://reviews.llvm.org/D88699 --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 8 ++ .../Target/AArch64/AArch64FrameLowering.cpp | 28 ++++--- llvm/test/CodeGen/AArch64/win64_vararg.ll | 6 +- .../CodeGen/AArch64/wineh-save-lrpair1.mir | 74 +++++++++++++++++++ .../CodeGen/AArch64/wineh-save-lrpair2.mir | 65 ++++++++++++++++ .../CodeGen/AArch64/wineh-save-lrpair3.mir | 68 +++++++++++++++++ 6 files changed, 236 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/wineh-save-lrpair1.mir create mode 100644 llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir create mode 100644 llvm/test/CodeGen/AArch64/wineh-save-lrpair3.mir diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 7c4a8555a1a12..0904e1c40c816 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1314,6 +1314,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { return; case AArch64::SEH_SaveRegP: + if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 && + MI->getOperand(0).getImm() <= 28) { + assert((MI->getOperand(0).getImm() - 19) % 2 == 0 && + "Register paired with LR must be odd"); + TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(), + MI->getOperand(2).getImm()); + return; + } assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) && "Non-consecutive registers not allowed for save_regp"); TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(), diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index e77899fa8d7a1..d33ebdd330c06 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1988,21 +1988,28 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, - bool NeedsWinCFI) { + bool NeedsWinCFI, bool IsFirst) { // If we are generating register pairs for a Windows function that requires // EH support, then pair consecutive registers only. There are no unwind // opcodes for saves/restores of non-consectuve register pairs. - // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x. + // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x, + // save_lrpair. // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling - // TODO: LR can be paired with any register. We don't support this yet in - // the MCLayer. We need to add support for the save_lrpair unwind code. if (Reg2 == AArch64::FP) return true; if (!NeedsWinCFI) return false; if (Reg2 == Reg1 + 1) return false; + // If pairing a GPR with LR, the pair can be described by the save_lrpair + // opcode. If this is the first register pair, it would end up with a + // predecrement, but there's no save_lrpair_x opcode, so we can only do this + // if LR is paired with something else than the first register. + // The save_lrpair opcode requires the first register to be an odd one. + if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 && + (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst) + return false; return true; } @@ -2011,9 +2018,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, /// LR and FP need to be allocated together when the frame needs to save /// the frame-record. This means any other register pairing with LR is invalid. static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, - bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord) { + bool UsesWinAAPCS, bool NeedsWinCFI, + bool NeedsFrameRecord, bool IsFirst) { if (UsesWinAAPCS) - return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI); + return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst); // If we need to store the frame record, don't pair any register // with LR other than FP. @@ -2112,16 +2120,18 @@ static void computeCalleeSaveRegisterPairs( // Add the next reg to the pair if it is in the same register class. if (unsigned(i + RegInc) < Count) { unsigned NextReg = CSI[i + RegInc].getReg(); + bool IsFirst = i == FirstReg; switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, NeedsWinCFI, - NeedsFrameRecord)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, + NeedsWinCFI, NeedsFrameRecord, IsFirst)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: if (AArch64::FPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + IsFirst)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR128: diff --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll index 4a60717f04a46..9492ab3c8c02e 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -213,8 +213,7 @@ declare void @llvm.stackrestore(i8*) ; CHECK-LABEL: snprintf ; CHECK-DAG: sub sp, sp, #96 ; CHECK-DAG: stp x19, x20, [sp, #16] -; CHECK-DAG: str x21, [sp, #32] -; CHECK-DAG: str x30, [sp, #40] +; CHECK-DAG: stp x21, x30, [sp, #32] ; CHECK-DAG: add x8, sp, #56 ; CHECK-DAG: mov x19, x2 ; CHECK-DAG: mov x20, x1 @@ -232,8 +231,7 @@ declare void @llvm.stackrestore(i8*) ; CHECK-DAG: mov x3, x19 ; CHECK-DAG: mov x4, xzr ; CHECK-DAG: bl __stdio_common_vsprintf -; CHECK-DAG: ldr x30, [sp, #40] -; CHECK-DAG: ldr x21, [sp, #32] +; CHECK-DAG: ldp x21, x30, [sp, #32] ; CHECK-DAG: ldp x19, x20, [sp, #16] ; CHECK-DAG: cmp w0, #0 ; CHECK-DAG: csinv w0, w0, wzr, ge diff --git a/llvm/test/CodeGen/AArch64/wineh-save-lrpair1.mir b/llvm/test/CodeGen/AArch64/wineh-save-lrpair1.mir new file mode 100644 index 0000000000000..7959ad611ed40 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-save-lrpair1.mir @@ -0,0 +1,74 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: -stop-after=prologepilog | FileCheck %s +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: | FileCheck --check-prefix=ASM %s + +# Check that an odd callee-saved GPR is paired with lr + +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -4 +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -32 +# CHECK-NEXT: frame-setup STPXi killed $x21, killed $lr, $sp, 2 +# CHECK-NEXT: frame-setup SEH_SaveRegP 21, 30, 16 +# CHECK-NEXT: frame-setup SEH_PrologEnd + +# ASM: stp x19, x20, [sp, #-32]! +# ASM-NEXT: .seh_save_regp_x x19, 32 +# ASM-NEXT: stp x21, x30, [sp, #16] +# ASM-NEXT: .seh_save_lrpair x21, 16 +# ASM-NEXT: .seh_endprologue + +--- | + + define dso_local i32 @func(i32 %a) { ret i32 %a } + declare dso_local i32 @other() + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x21, $x19, $x20 + + BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + $x19 = ADDXrr $x0, $x0 + $x20 = ADDXrr $x19, $x0 + $x21 = ADDXrr $x20, killed $x19 + $x0 = ADDXrr $x0, killed $x21 + + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir b/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir new file mode 100644 index 0000000000000..2ec296f7b9e7e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir @@ -0,0 +1,65 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: -stop-after=prologepilog | FileCheck %s + +# Check that lr isn't paired with a GPR if it's the first pair, as +# that can't be described as a SEH opcode if combined with predecrement. + +# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16 +# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -16 +# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 1 +# CHECK-NEXT: frame-setup SEH_SaveReg 30, 8 +# CHECK-NEXT: frame-setup SEH_PrologEnd + +--- | + + define dso_local i32 @func(i32 %a) { ret i32 %a } + declare dso_local i32 @other() + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x19 + + BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + $x19 = ADDXrr $x0, $x0 + $x0 = ADDXrr $x0, killed $x19 + + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/wineh-save-lrpair3.mir b/llvm/test/CodeGen/AArch64/wineh-save-lrpair3.mir new file mode 100644 index 0000000000000..e45cf9ff3e4c8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-save-lrpair3.mir @@ -0,0 +1,68 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: -stop-after=prologepilog | FileCheck %s + +# Check that an unpaired register that is even isn't paired with lr. + +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -4 +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -32 +# CHECK-NEXT: frame-setup STRXui killed $x22, $sp, 2 +# CHECK-NEXT: frame-setup SEH_SaveReg 22, 16 +# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 3 +# CHECK-NEXT: frame-setup SEH_SaveReg 30, 24 +# CHECK-NEXT: frame-setup SEH_PrologEnd + +--- | + + define dso_local i32 @func(i32 %a) { ret i32 %a } + declare dso_local i32 @other() + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x22, $x19, $x20 + + BL @other, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 + $x19 = ADDXrr $x0, $x0 + $x20 = ADDXrr $x19, $x0 + $x22 = ADDXrr $x20, killed $x19 + $x0 = ADDXrr $x0, killed $x22 + + RET_ReallyLR + +... From 7d07405761aec8434a0cdb1c5644823a394f7def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 23 Sep 2020 15:00:53 +0300 Subject: [PATCH 475/544] [AArch64] Prefer prologues with sp adjustments merged into stp/ldp for WinCFI, if optimizing for size This makes the prologue match the windows canonical layout, for cases without a frame pointer. This can potentially be a slower (a longer dependency chain of the sp register, and potentially one arithmetic operation more on some cores), but gives notable size improvements. The previous two commits shrinks a 166 KB xdata section by 49 KB, and if the change from this commit is enabled, it shrinks the xdata section by another 25 KB. In total, since the start of the recent arm64 unwind info cleanups and optimizations (since before commit 37ef743cbf3), the xdata+pdata sections of the same test DLL has shrunk from 407 KB in total originally, to 163 KB now. Differential Revision: https://reviews.llvm.org/D88701 --- .../Target/AArch64/AArch64FrameLowering.cpp | 24 +++++-- .../AArch64/wineh-frame-predecrement.mir | 70 +++++++++++++++++++ 2 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index d33ebdd330c06..0d52b00d54ba9 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -579,6 +579,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF, !F.hasFnAttribute("no-stack-arg-probe"); } +static bool needsWinCFI(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); +} + bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, uint64_t StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); @@ -589,6 +595,18 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( if (AFI->getLocalStackSize() == 0) return false; + // For WinCFI, if optimizing for size, prefer to not combine the stack bump + // (to force a stp with predecrement) to match the packed unwind format, + // provided that there actually are any callee saved registers to merge the + // decrement with. + // This is potentially marginally slower, but allows using the packed + // unwind format for functions that both have a local area and callee saved + // registers. Using the packed unwind format notably reduces the size of + // the unwind info. + if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && + MF.getFunction().hasOptSize()) + return false; + // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) @@ -982,12 +1000,6 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB, // } -static bool needsWinCFI(const MachineFunction &MF) { - const Function &F = MF.getFunction(); - return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - F.needsUnwindTableEntry(); -} - static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } diff --git a/llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir b/llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir new file mode 100644 index 0000000000000..1bed8f6b547a2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-frame-predecrement.mir @@ -0,0 +1,70 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \ +# RUN: -stop-after=prologepilog | FileCheck %s + +# Check that the callee-saved registers are saved starting with a STP +# with predecrement, followed by a separate stack adjustment later, +# if the optsize attribute is set. + +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -2 +# CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -16 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-NEXT: frame-setup SEH_StackAlloc 16 +# CHECK-NEXT: frame-setup SEH_PrologEnd + +--- | + + define dso_local i32 @func(i32 %a) optsize { ret i32 %a } + +... +--- +name: func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 4 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x19, $x20 + + renamable $x8 = ADDXri %stack.0, 0, 0 + $x19 = ADDXrr $x0, $x8 + $x20 = ADDXrr $x19, $x0 + $x0 = ADDXrr $x0, killed $x20 + + RET_ReallyLR + +... From ef72591de971ee22dd47a949583fd1be38ba0d1b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 3 Oct 2020 20:41:41 +0100 Subject: [PATCH 476/544] [LV] Add another test case with unsinkable first-order recurrences. --- .../first-order-recurrence-complex.ll | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 3f9dc805f8193..1407abce979e7 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -267,3 +267,36 @@ bb13: ; preds = %bb13, %bb bb74: ; preds = %bb13 ret void } + +; Users that are phi nodes cannot be sunk. +define void @cannot_sink_phi(i32* %ptr) { +; CHECK-LABEL: define void @cannot_sink_phi( +; CHECK-NOT: vector.body +entry: + br label %loop.header + +loop.header: ; preds = %if.end128, %for.cond108.preheader + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ] + %for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ] + %c.1 = icmp ult i64 %iv, 500 + br i1 %c.1, label %if.truebb, label %if.falsebb + +if.truebb: ; preds = %for.body114 + br label %loop.latch + +if.falsebb: ; preds = %for.body114 + br label %loop.latch + +loop.latch: ; preds = %if.then122, %for.body114.if.end128_crit_edge + %first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ] + %c.2 = icmp ult i64 %iv, 800 + %for.next = select i1 %c.2, i32 30, i32 %first_time.1 + %ptr.idx = getelementptr i32, i32* %ptr, i64 %iv + store i32 %for.next, i32* %ptr.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop.header + +exit: + ret void +} From 1038ce4b6bf11c9615e60b503bdb253a000a6d90 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 3 Oct 2020 22:32:46 +0300 Subject: [PATCH 477/544] [NFC][PhaseOrdering] Add a test showing new inttoptr casts after SROA due to InstCombine (PR47592) We could either try to make SROA more picky to the new type and/or prevent InstCombine from creating the original problem (converting load-stores to operate on ints), and/or make InstCombine recover the situation by cleaning up all that cruft. --- .../instcombine-sroa-inttoptr.ll | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll new file mode 100644 index 0000000000000..6de0282e9448a --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -S | FileCheck %s --check-prefixes=CHECK,OLDPM +; RUN: opt < %s -passes='default' -aa-pipeline=default -S | FileCheck %s --check-prefixes=CHECK,NEWPM + +; This is based on the following most basic C++ code: +; +; struct S { +; int* data; +; int x, y, z; +; }; +; +; S gen(S a) { +; S b; +; b.data = a.data; +; return b; +; } +; +; void escape0(S); +; +; int* foo(S a) { +; S b = gen(a); +; escape0(b); +; return b.data; +; } +; +; int cond(); +; void sync0(); +; void sync1(); +; void escape0(int*); +; void escape1(int*); +; +; int* bar(S a) { +; S b = gen(a); +; if(cond()) { +; sync0(); +; escape0(b.data); +; } else { +; sync1(); +; escape1(b.data); +; } +; return b.data; +; } +; +; There are no inttoptr casts in the original source code, nor should there be any in the optimized IR. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%0 = type { i32*, i32, i32, i32 } + +define dso_local void @_Z3gen1S(%0* noalias sret align 8 %arg, %0* byval(%0) align 8 %arg1) { +; CHECK-LABEL: @_Z3gen1S( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG1:%.*]] to i64* +; CHECK-NEXT: [[I21:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: store i64 [[I21]], i64* [[TMP1]], align 8 +; CHECK-NEXT: ret void +; +bb: + %i = getelementptr inbounds %0, %0* %arg1, i32 0, i32 0 + %i2 = load i32*, i32** %i, align 8 + %i3 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0 + store i32* %i2, i32** %i3, align 8 + ret void +} + +define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) { +; CHECK-LABEL: @_Z3foo1S( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 +; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I5_SROA_CAST]], align 8 +; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_CAST:%.*]] = bitcast %0* [[I2]] to i64* +; CHECK-NEXT: store i64 [[I1_SROA_0_0_COPYLOAD]], i64* [[I_SROA_0_0_I6_SROA_CAST]], align 8 +; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]]) +; CHECK-NEXT: [[TMP0]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* +; CHECK-NEXT: ret i32* [[TMP0]] +; +bb: + %i = alloca %0, align 8 + %i1 = alloca %0, align 8 + %i2 = alloca %0, align 8 + %i3 = bitcast %0* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 24, i8* %i3) + %i4 = bitcast %0* %i1 to i8* + %i5 = bitcast %0* %arg to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %i4, i8* align 8 %i5, i64 24, i1 false) + call void @_Z3gen1S(%0* sret align 8 %i, %0* byval(%0) align 8 %i1) + %i6 = bitcast %0* %i2 to i8* + %i7 = bitcast %0* %i to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %i6, i8* align 8 %i7, i64 24, i1 false) + call void @_Z7escape01S(%0* byval(%0) align 8 %i2) + %i8 = getelementptr inbounds %0, %0* %i, i32 0, i32 0 + %i9 = load i32*, i32** %i8, align 8 + %i10 = bitcast %0* %i to i8* + call void @llvm.lifetime.end.p0i8(i64 24, i8* %i10) + ret i32* %i9 +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +declare dso_local void @_Z7escape01S(%0* byval(%0) align 8) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { +; CHECK-LABEL: @_Z3bar1S( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I4_SROA_CAST]], align 8 +; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() +; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 +; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: tail call void @_Z5sync0v() +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP0]]) +; CHECK-NEXT: br label [[BB13:%.*]] +; CHECK: bb10: +; CHECK-NEXT: tail call void @_Z5sync1v() +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) +; CHECK-NEXT: br label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: [[DOTPRE_PHI:%.*]] = phi i32* [ [[TMP1]], [[BB10]] ], [ [[TMP0]], [[BB7]] ] +; CHECK-NEXT: ret i32* [[DOTPRE_PHI]] +; +bb: + %i = alloca %0, align 8 + %i1 = alloca %0, align 8 + %i2 = bitcast %0* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 24, i8* %i2) + %i3 = bitcast %0* %i1 to i8* + %i4 = bitcast %0* %arg to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %i3, i8* align 8 %i4, i64 24, i1 false) + call void @_Z3gen1S(%0* sret align 8 %i, %0* byval(%0) align 8 %i1) + %i5 = call i32 @_Z4condv() + %i6 = icmp ne i32 %i5, 0 + br i1 %i6, label %bb7, label %bb10 + +bb7: + call void @_Z5sync0v() + %i8 = getelementptr inbounds %0, %0* %i, i32 0, i32 0 + %i9 = load i32*, i32** %i8, align 8 + call void @_Z7escape0Pi(i32* %i9) + br label %bb13 + +bb10: + call void @_Z5sync1v() + %i11 = getelementptr inbounds %0, %0* %i, i32 0, i32 0 + %i12 = load i32*, i32** %i11, align 8 + call void @_Z7escape1Pi(i32* %i12) + br label %bb13 + +bb13: + %i14 = getelementptr inbounds %0, %0* %i, i32 0, i32 0 + %i15 = load i32*, i32** %i14, align 8 + %i16 = bitcast %0* %i to i8* + call void @llvm.lifetime.end.p0i8(i64 24, i8* %i16) + ret i32* %i15 +} + +declare dso_local i32 @_Z4condv() +declare dso_local void @_Z5sync0v() +declare dso_local void @_Z7escape0Pi(i32*) +declare dso_local void @_Z5sync1v() +declare dso_local void @_Z7escape1Pi(i32*) From cd20c26622287f29f96bf8012d5aa0bd9774c7bc Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 3 Oct 2020 22:46:50 +0300 Subject: [PATCH 478/544] [NFC][InstCombine] Autogenerate a few tests being affected by an upcoming patch --- llvm/test/Transforms/InstCombine/atomic.ll | 267 ++++++++++++------ .../InstCombine/loadstore-metadata.ll | 80 +++++- .../InstCombine/non-integral-pointers.ll | 64 +++-- 3 files changed, 290 insertions(+), 121 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll index 869b12526de93..382d23d153098 100644 --- a/llvm/test/Transforms/InstCombine/atomic.ll +++ b/llvm/test/Transforms/InstCombine/atomic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -instcombine | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" @@ -6,9 +7,11 @@ target triple = "x86_64-apple-macosx10.7.0" ; Check transforms involving atomic operations define i32 @test1(i32* %p) { -; CHECK-LABEL: define i32 @test1( -; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4 -; CHECK: shl i32 %x, 1 +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: ret i32 [[Z]] +; %x = load atomic i32, i32* %p seq_cst, align 4 %y = load i32, i32* %p, align 4 %z = add i32 %x, %y @@ -16,9 +19,12 @@ define i32 @test1(i32* %p) { } define i32 @test2(i32* %p) { -; CHECK-LABEL: define i32 @test2( -; CHECK: %x = load volatile i32, i32* %p, align 4 -; CHECK: %y = load volatile i32, i32* %p, align 4 +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[Y:%.*]] = load volatile i32, i32* [[P]], align 4 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[Z]] +; %x = load volatile i32, i32* %p, align 4 %y = load volatile i32, i32* %p, align 4 %z = add i32 %x, %y @@ -29,8 +35,11 @@ define i32 @test2(i32* %p) { ; memory location are a bit unclear, but conservatively, we know we don't ; want to remove the volatile. define i32 @test3(i32* %p) { -; CHECK-LABEL: define i32 @test3( -; CHECK: %x = load volatile i32, i32* %p, align 4 +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: ret i32 [[Z]] +; %x = load volatile i32, i32* %p, align 4 %y = load i32, i32* %p, align 4 %z = add i32 %x, %y @@ -39,21 +48,26 @@ define i32 @test3(i32* %p) { ; Forwarding from a stronger ordered atomic is fine define i32 @test4(i32* %p) { -; CHECK-LABEL: define i32 @test4( -; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4 -; CHECK: shl i32 %x, 1 +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: ret i32 [[Z]] +; %x = load atomic i32, i32* %p seq_cst, align 4 %y = load atomic i32, i32* %p unordered, align 4 %z = add i32 %x, %y ret i32 %z } -; Forwarding from a non-atomic is not. (The earlier load -; could in priciple be promoted to atomic and then forwarded, +; Forwarding from a non-atomic is not. (The earlier load +; could in priciple be promoted to atomic and then forwarded, ; but we can't just drop the atomic from the load.) define i32 @test5(i32* %p) { -; CHECK-LABEL: define i32 @test5( -; CHECK: %x = load atomic i32, i32* %p unordered, align 4 +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: ret i32 [[Z]] +; %x = load atomic i32, i32* %p unordered, align 4 %y = load i32, i32* %p, align 4 %z = add i32 %x, %y @@ -62,9 +76,11 @@ define i32 @test5(i32* %p) { ; Forwarding atomic to atomic is fine define i32 @test6(i32* %p) { -; CHECK-LABEL: define i32 @test6( -; CHECK: %x = load atomic i32, i32* %p unordered, align 4 -; CHECK: shl i32 %x, 1 +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: ret i32 [[Z]] +; %x = load atomic i32, i32* %p unordered, align 4 %y = load atomic i32, i32* %p unordered, align 4 %z = add i32 %x, %y @@ -73,9 +89,12 @@ define i32 @test6(i32* %p) { ; FIXME: we currently don't do anything for monotonic define i32 @test7(i32* %p) { -; CHECK-LABEL: define i32 @test7( -; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4 -; CHECK: %y = load atomic i32, i32* %p monotonic, align 4 +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] monotonic, align 4 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[Z]] +; %x = load atomic i32, i32* %p seq_cst, align 4 %y = load atomic i32, i32* %p monotonic, align 4 %z = add i32 %x, %y @@ -84,9 +103,12 @@ define i32 @test7(i32* %p) { ; FIXME: We could forward in racy code define i32 @test8(i32* %p) { -; CHECK-LABEL: define i32 @test8( -; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4 -; CHECK: %y = load atomic i32, i32* %p acquire, align 4 +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] acquire, align 4 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[Z]] +; %x = load atomic i32, i32* %p seq_cst, align 4 %y = load atomic i32, i32* %p acquire, align 4 %z = add i32 %x, %y @@ -96,45 +118,57 @@ define i32 @test8(i32* %p) { ; An unordered access to null is still unreachable. There's no ; ordering imposed. define i32 @test9() { -; CHECK-LABEL: define i32 @test9( -; CHECK: store i32 undef, i32* null +; CHECK-LABEL: @test9( +; CHECK-NEXT: store i32 undef, i32* null, align 536870912 +; CHECK-NEXT: ret i32 undef +; %x = load atomic i32, i32* null unordered, align 4 ret i32 %x } define i32 @test9_no_null_opt() #0 { -; CHECK-LABEL: define i32 @test9_no_null_opt( -; CHECK: load atomic i32, i32* null unordered +; CHECK-LABEL: @test9_no_null_opt( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null unordered, align 536870912 +; CHECK-NEXT: ret i32 [[X]] +; %x = load atomic i32, i32* null unordered, align 4 ret i32 %x } ; FIXME: Could also fold define i32 @test10() { -; CHECK-LABEL: define i32 @test10( -; CHECK: load atomic i32, i32* null monotonic +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 536870912 +; CHECK-NEXT: ret i32 [[X]] +; %x = load atomic i32, i32* null monotonic, align 4 ret i32 %x } define i32 @test10_no_null_opt() #0 { -; CHECK-LABEL: define i32 @test10_no_null_opt( -; CHECK: load atomic i32, i32* null monotonic +; CHECK-LABEL: @test10_no_null_opt( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 536870912 +; CHECK-NEXT: ret i32 [[X]] +; %x = load atomic i32, i32* null monotonic, align 4 ret i32 %x } ; Would this be legal to fold? Probably? define i32 @test11() { -; CHECK-LABEL: define i32 @test11( -; CHECK: load atomic i32, i32* null seq_cst +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 536870912 +; CHECK-NEXT: ret i32 [[X]] +; %x = load atomic i32, i32* null seq_cst, align 4 ret i32 %x } define i32 @test11_no_null_opt() #0 { -; CHECK-LABEL: define i32 @test11_no_null_opt( -; CHECK: load atomic i32, i32* null seq_cst +; CHECK-LABEL: @test11_no_null_opt( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 536870912 +; CHECK-NEXT: ret i32 [[X]] +; %x = load atomic i32, i32* null seq_cst, align 4 ret i32 %x } @@ -142,45 +176,57 @@ define i32 @test11_no_null_opt() #0 { ; An unordered access to null is still unreachable. There's no ; ordering imposed. define i32 @test12() { -; CHECK-LABEL: define i32 @test12( -; CHECK: store atomic i32 undef, i32* null +; CHECK-LABEL: @test12( +; CHECK-NEXT: store atomic i32 undef, i32* null unordered, align 536870912 +; CHECK-NEXT: ret i32 0 +; store atomic i32 0, i32* null unordered, align 4 ret i32 0 } define i32 @test12_no_null_opt() #0 { -; CHECK-LABEL: define i32 @test12_no_null_opt( -; CHECK: store atomic i32 0, i32* null unordered +; CHECK-LABEL: @test12_no_null_opt( +; CHECK-NEXT: store atomic i32 0, i32* null unordered, align 536870912 +; CHECK-NEXT: ret i32 0 +; store atomic i32 0, i32* null unordered, align 4 ret i32 0 } ; FIXME: Could also fold define i32 @test13() { -; CHECK-LABEL: define i32 @test13( -; CHECK: store atomic i32 0, i32* null monotonic +; CHECK-LABEL: @test13( +; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 536870912 +; CHECK-NEXT: ret i32 0 +; store atomic i32 0, i32* null monotonic, align 4 ret i32 0 } define i32 @test13_no_null_opt() #0 { -; CHECK-LABEL: define i32 @test13_no_null_opt( -; CHECK: store atomic i32 0, i32* null monotonic +; CHECK-LABEL: @test13_no_null_opt( +; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 536870912 +; CHECK-NEXT: ret i32 0 +; store atomic i32 0, i32* null monotonic, align 4 ret i32 0 } ; Would this be legal to fold? Probably? define i32 @test14() { -; CHECK-LABEL: define i32 @test14( -; CHECK: store atomic i32 0, i32* null seq_cst +; CHECK-LABEL: @test14( +; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 536870912 +; CHECK-NEXT: ret i32 0 +; store atomic i32 0, i32* null seq_cst, align 4 ret i32 0 } define i32 @test14_no_null_opt() #0 { -; CHECK-LABEL: define i32 @test14_no_null_opt( -; CHECK: store atomic i32 0, i32* null seq_cst +; CHECK-LABEL: @test14_no_null_opt( +; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 536870912 +; CHECK-NEXT: ret i32 0 +; store atomic i32 0, i32* null seq_cst, align 4 ret i32 0 } @@ -189,9 +235,12 @@ define i32 @test14_no_null_opt() #0 { @b = external global i32 define i32 @test15(i1 %cnd) { -; CHECK-LABEL: define i32 @test15( -; CHECK: load atomic i32, i32* @a unordered, align 4 -; CHECK: load atomic i32, i32* @b unordered, align 4 +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[A_VAL:%.*]] = load atomic i32, i32* @a unordered, align 4 +; CHECK-NEXT: [[B_VAL:%.*]] = load atomic i32, i32* @b unordered, align 4 +; CHECK-NEXT: [[X:%.*]] = select i1 [[CND:%.*]], i32 [[A_VAL]], i32 [[B_VAL]] +; CHECK-NEXT: ret i32 [[X]] +; %addr = select i1 %cnd, i32* @a, i32* @b %x = load atomic i32, i32* %addr unordered, align 4 ret i32 %x @@ -199,8 +248,11 @@ define i32 @test15(i1 %cnd) { ; FIXME: This would be legal to transform define i32 @test16(i1 %cnd) { -; CHECK-LABEL: define i32 @test16( -; CHECK: load atomic i32, i32* %addr monotonic, align 4 +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] monotonic, align 4 +; CHECK-NEXT: ret i32 [[X]] +; %addr = select i1 %cnd, i32* @a, i32* @b %x = load atomic i32, i32* %addr monotonic, align 4 ret i32 %x @@ -208,17 +260,28 @@ define i32 @test16(i1 %cnd) { ; FIXME: This would be legal to transform define i32 @test17(i1 %cnd) { -; CHECK-LABEL: define i32 @test17( -; CHECK: load atomic i32, i32* %addr seq_cst, align 4 +; CHECK-LABEL: @test17( +; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b +; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[X]] +; %addr = select i1 %cnd, i32* @a, i32* @b %x = load atomic i32, i32* %addr seq_cst, align 4 ret i32 %x } define i32 @test22(i1 %cnd) { -; CHECK-LABEL: define i32 @test22( -; CHECK: [[PHI:%.*]] = phi i32 -; CHECK: store atomic i32 [[PHI]], i32* @a unordered, align 4 +; CHECK-LABEL: @test22( +; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: block2: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 2, [[BLOCK2]] ], [ 1, [[BLOCK1]] ] +; CHECK-NEXT: store atomic i32 [[STOREMERGE]], i32* @a unordered, align 4 +; CHECK-NEXT: ret i32 0 +; br i1 %cnd, label %block1, label %block2 block1: @@ -234,8 +297,17 @@ merge: ; TODO: probably also legal here define i32 @test23(i1 %cnd) { -; CHECK-LABEL: define i32 @test23( -; CHECK: br i1 %cnd, label %block1, label %block2 +; CHECK-LABEL: @test23( +; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: store atomic i32 1, i32* @a monotonic, align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: block2: +; CHECK-NEXT: store atomic i32 2, i32* @a monotonic, align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: ret i32 0 +; br i1 %cnd, label %block1, label %block2 block1: @@ -252,9 +324,14 @@ merge: declare void @clobber() define i32 @test18(float* %p) { -; CHECK-LABEL: define i32 @test18( -; CHECK: load atomic i32, i32* [[A:%.*]] unordered, align 4 -; CHECK: store atomic i32 [[B:%.*]], i32* [[C:%.*]] unordered, align 4 +; CHECK-LABEL: @test18( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to i32* +; CHECK-NEXT: [[X1:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P]] to i32* +; CHECK-NEXT: store atomic i32 [[X1]], i32* [[TMP2]] unordered, align 4 +; CHECK-NEXT: ret i32 0 +; %x = load atomic float, float* %p unordered, align 4 call void @clobber() ;; keep the load around store atomic float %x, float* %p unordered, align 4 @@ -263,9 +340,12 @@ define i32 @test18(float* %p) { ; TODO: probably also legal in this case define i32 @test19(float* %p) { -; CHECK-LABEL: define i32 @test19( -; CHECK: load atomic float, float* %p seq_cst, align 4 -; CHECK: store atomic float %x, float* %p seq_cst, align 4 +; CHECK-LABEL: @test19( +; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: store atomic float [[X]], float* [[P]] seq_cst, align 4 +; CHECK-NEXT: ret i32 0 +; %x = load atomic float, float* %p seq_cst, align 4 call void @clobber() ;; keep the load around store atomic float %x, float* %p seq_cst, align 4 @@ -273,57 +353,74 @@ define i32 @test19(float* %p) { } define i32 @test20(i32** %p, i8* %v) { -; CHECK-LABEL: define i32 @test20( -; CHECK: store atomic i8* %v, i8** [[D:%.*]] unordered, align 4 +; CHECK-LABEL: @test20( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** [[P:%.*]] to i8** +; CHECK-NEXT: store atomic i8* [[V:%.*]], i8** [[TMP1]] unordered, align 4 +; CHECK-NEXT: ret i32 0 +; %cast = bitcast i8* %v to i32* store atomic i32* %cast, i32** %p unordered, align 4 ret i32 0 } define i32 @test21(i32** %p, i8* %v) { -; CHECK-LABEL: define i32 @test21( -; CHECK: store atomic i32* %cast, i32** %p monotonic, align 4 +; CHECK-LABEL: @test21( +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[V:%.*]] to i32* +; CHECK-NEXT: store atomic i32* [[CAST]], i32** [[P:%.*]] monotonic, align 4 +; CHECK-NEXT: ret i32 0 +; %cast = bitcast i8* %v to i32* store atomic i32* %cast, i32** %p monotonic, align 4 ret i32 0 } define void @pr27490a(i8** %p1, i8** %p2) { -; CHECK-LABEL: define void @pr27490 -; CHECK: %1 = bitcast i8** %p1 to i64* -; CHECK: %l1 = load i64, i64* %1, align 8 -; CHECK: %2 = bitcast i8** %p2 to i64* -; CHECK: store volatile i64 %l1, i64* %2, align 8 +; CHECK-LABEL: @pr27490a( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64* +; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64* +; CHECK-NEXT: store volatile i64 [[L1]], i64* [[TMP2]], align 8 +; CHECK-NEXT: ret void +; %l = load i8*, i8** %p1 store volatile i8* %l, i8** %p2 ret void } define void @pr27490b(i8** %p1, i8** %p2) { -; CHECK-LABEL: define void @pr27490 -; CHECK: %1 = bitcast i8** %p1 to i64* -; CHECK: %l1 = load i64, i64* %1, align 8 -; CHECK: %2 = bitcast i8** %p2 to i64* -; CHECK: store atomic i64 %l1, i64* %2 seq_cst, align 8 +; CHECK-LABEL: @pr27490b( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64* +; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64* +; CHECK-NEXT: store atomic i64 [[L1]], i64* [[TMP2]] seq_cst, align 8 +; CHECK-NEXT: ret void +; %l = load i8*, i8** %p1 store atomic i8* %l, i8** %p2 seq_cst, align 8 ret void } -;; At the moment, we can't form atomic vectors by folding since these are +;; At the moment, we can't form atomic vectors by folding since these are ;; not representable in the IR. This was pr29121. The right long term ;; solution is to extend the IR to handle this case. define <2 x float> @no_atomic_vector_load(i64* %p) { -; CHECK-LABEL: @no_atomic_vector_load -; CHECK: load atomic i64, i64* %p unordered, align 8 +; CHECK-LABEL: @no_atomic_vector_load( +; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64* [[P:%.*]] unordered, align 8 +; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i64 [[LOAD]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[DOTCAST]] +; %load = load atomic i64, i64* %p unordered, align 8 %.cast = bitcast i64 %load to <2 x float> ret <2 x float> %.cast } define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) { -; CHECK-LABEL: @no_atomic_vector_store -; CHECK: store atomic i64 %1, i64* %2 unordered, align 8 +; CHECK-LABEL: @no_atomic_vector_store( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2:%.*]] to i64* +; CHECK-NEXT: store atomic i64 [[TMP1]], i64* [[TMP2]] unordered, align 8 +; CHECK-NEXT: ret void +; %1 = bitcast <2 x float> %p to i64 %2 = bitcast i8* %p2 to i64* store atomic i64 %1, i64* %2 unordered, align 8 diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll index 5916a8d3a4c33..e443c6ed00759 100644 --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -5,7 +6,11 @@ target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" define i32 @test_load_cast_combine_tbaa(float* %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA. ; CHECK-LABEL: @test_load_cast_combine_tbaa( -; CHECK: load i32, i32* %{{.*}}, !tbaa !0 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, [[TBAA0:!tbaa !.*]] +; CHECK-NEXT: ret i32 [[L1]] +; entry: %l = load float, float* %ptr, !tbaa !0 %c = bitcast float %l to i32 @@ -15,7 +20,11 @@ entry: define i32 @test_load_cast_combine_noalias(float* %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata. ; CHECK-LABEL: @test_load_cast_combine_noalias( -; CHECK: load i32, i32* %{{.*}}, !alias.scope !3, !noalias !4 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, !alias.scope !3, !noalias !4 +; CHECK-NEXT: ret i32 [[L1]] +; entry: %l = load float, float* %ptr, !alias.scope !3, !noalias !4 %c = bitcast float %l to i32 @@ -27,9 +36,11 @@ define float @test_load_cast_combine_range(i32* %ptr) { ; would be nice to preserve or update it somehow but this is hard when moving ; between types. ; CHECK-LABEL: @test_load_cast_combine_range( -; CHECK: load float, float* %{{.*}} -; CHECK-NOT: !range -; CHECK: ret float +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to float* +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[TMP0]], align 4 +; CHECK-NEXT: ret float [[L1]] +; entry: %l = load i32, i32* %ptr, !range !5 %c = bitcast i32 %l to float @@ -39,7 +50,11 @@ entry: define i32 @test_load_cast_combine_invariant(float* %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata. ; CHECK-LABEL: @test_load_cast_combine_invariant( -; CHECK: load i32, i32* %{{.*}}, !invariant.load !7 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, !invariant.load !7 +; CHECK-NEXT: ret i32 [[L1]] +; entry: %l = load float, float* %ptr, !invariant.load !6 %c = bitcast float %l to i32 @@ -50,7 +65,11 @@ define i32 @test_load_cast_combine_nontemporal(float* %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal ; metadata. ; CHECK-LABEL: @test_load_cast_combine_nontemporal( -; CHECK: load i32, i32* %{{.*}}, !nontemporal !8 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, !nontemporal !8 +; CHECK-NEXT: ret i32 [[L1]] +; entry: %l = load float, float* %ptr, !nontemporal !7 %c = bitcast float %l to i32 @@ -61,7 +80,11 @@ define i8* @test_load_cast_combine_align(i32** %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves align ; metadata. ; CHECK-LABEL: @test_load_cast_combine_align( -; CHECK: load i8*, i8** %{{.*}}, !align !9 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PTR:%.*]] to i8** +; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[TMP0]], align 8, !align !9 +; CHECK-NEXT: ret i8* [[L1]] +; entry: %l = load i32*, i32** %ptr, !align !8 %c = bitcast i32* %l to i8* @@ -72,7 +95,11 @@ define i8* @test_load_cast_combine_deref(i32** %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable ; metadata. ; CHECK-LABEL: @test_load_cast_combine_deref( -; CHECK: load i8*, i8** %{{.*}}, !dereferenceable !9 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PTR:%.*]] to i8** +; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[TMP0]], align 8, !dereferenceable !9 +; CHECK-NEXT: ret i8* [[L1]] +; entry: %l = load i32*, i32** %ptr, !dereferenceable !8 %c = bitcast i32* %l to i8* @@ -83,7 +110,11 @@ define i8* @test_load_cast_combine_deref_or_null(i32** %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves ; dereferenceable_or_null metadata. ; CHECK-LABEL: @test_load_cast_combine_deref_or_null( -; CHECK: load i8*, i8** %{{.*}}, !dereferenceable_or_null !9 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PTR:%.*]] to i8** +; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[TMP0]], align 8, !dereferenceable_or_null !9 +; CHECK-NEXT: ret i8* [[L1]] +; entry: %l = load i32*, i32** %ptr, !dereferenceable_or_null !8 %c = bitcast i32* %l to i8* @@ -94,7 +125,23 @@ define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access ; metadata. ; CHECK-LABEL: @test_load_cast_combine_loop( -; CHECK: load i32, i32* %{{.*}}, !llvm.access.group !6 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[SRC_GEP]] to i32* +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !6 +; CHECK-NEXT: store i32 [[L1]], i32* [[DST_GEP]], align 4 +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]], [[LOOP1:!llvm.loop !.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -126,9 +173,14 @@ define void @test_load_cast_combine_nonnull(float** %ptr) { ; file, and no LABEL lines are to be added after this point. ; ; CHECK-LABEL: @test_load_cast_combine_nonnull( -; CHECK: %[[V:.*]] = load i64, i64* %{{.*}}, !range ![[MD:[0-9]+]] -; CHECK-NOT: !nonnull -; CHECK: store i64 %[[V]], i64* +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float** [[PTR:%.*]] to i64* +; CHECK-NEXT: [[P1:%.*]] = load i64, i64* [[TMP0]], align 8, !range ![[MD:[0-9]+]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float*, float** [[PTR]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float** [[GEP]] to i64* +; CHECK-NEXT: store i64 [[P1]], i64* [[TMP1]], align 8 +; CHECK-NEXT: ret void +; entry: %p = load float*, float** %ptr, !nonnull !6 %gep = getelementptr float*, float** %ptr, i32 42 diff --git a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll index 3b4538985bd5e..e8f0013604a9c 100644 --- a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll +++ b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" @@ -5,16 +6,17 @@ target triple = "x86_64-unknown-linux-gnu" define i8 addrspace(4)* @f_0() { ; CHECK-LABEL: @f_0( -; CHECK: ret i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* null, i64 50) +; CHECK-NEXT: ret i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* null, i64 50) +; %result = getelementptr i8, i8 addrspace(4)* null, i64 50 ret i8 addrspace(4)* %result } define i8 addrspace(3)* @f_1() { ; inttoptr is fine here since addrspace(3) is integral. - ; CHECK-LABEL: @f_1( -; CHECK: ret i8 addrspace(3)* inttoptr (i64 50 to i8 addrspace(3)*) +; CHECK-NEXT: ret i8 addrspace(3)* inttoptr (i64 50 to i8 addrspace(3)*) +; %result = getelementptr i8, i8 addrspace(3)* null, i64 50 ret i8 addrspace(3)* %result } @@ -22,13 +24,13 @@ define i8 addrspace(3)* @f_1() { define void @f_2(i8 addrspace(4)** %ptr0, i8 addrspace(4)** %ptr1) { ; It is not okay to convert the load/store pair to load and store ; integers, since pointers in address space 4 are non-integral. - ; CHECK-LABEL: @f_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)** [[PTR0:%.*]], align 8 +; CHECK-NEXT: store i8 addrspace(4)* [[VAL]], i8 addrspace(4)** [[PTR1:%.*]], align 8 +; CHECK-NEXT: ret void +; entry: -; CHECK: %val = load i8 addrspace(4)*, i8 addrspace(4)** %ptr0, align 8 -; CHECK: store i8 addrspace(4)* %val, i8 addrspace(4)** %ptr1, align 8 -; CHECK-NOT: load i64 -; CHECK-NOT: store i64 %val = load i8 addrspace(4)*, i8 addrspace(4)** %ptr0 store i8 addrspace(4)* %val, i8 addrspace(4)** %ptr1 ret void @@ -37,44 +39,60 @@ entry: define void @f_3(i8 addrspace(3)** %ptr0, i8 addrspace(3)** %ptr1) { ; It *is* okay to convert the load/store pair to load and store ; integers, since pointers in address space 3 are integral. - ; CHECK-LABEL: @f_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 addrspace(3)** [[PTR0:%.*]] to i64* +; CHECK-NEXT: [[VAL1:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)** [[PTR1:%.*]] to i64* +; CHECK-NEXT: store i64 [[VAL1]], i64* [[TMP1]], align 8 +; CHECK-NEXT: ret void +; entry: -; CHECK: load i64 -; CHECK: store i64 %val = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0 store i8 addrspace(3)* %val, i8 addrspace(3)** %ptr1 ret void } define i64 @g(i8 addrspace(4)** %gp) { - ; CHECK-LABEL: @g( - ; CHECK: load +; CHECK-LABEL: @g( +; CHECK-NEXT: [[DOTPRE:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)** [[GP:%.*]], align 8 +; CHECK-NEXT: [[V74:%.*]] = call i8 addrspace(4)* @alloc() +; CHECK-NEXT: [[V77:%.*]] = getelementptr i8, i8 addrspace(4)* [[V74]], i64 -8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i8 addrspace(4)* addrspace(4)* +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast i8 addrspace(4)* addrspace(4)* [[TMP1]] to i8 addrspace(4)** +; CHECK-NEXT: store i8 addrspace(4)* [[DOTPRE]], i8 addrspace(4)** [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i64 addrspace(4)* +; CHECK-NEXT: [[V80:%.*]] = addrspacecast i64 addrspace(4)* [[TMP3]] to i64* +; CHECK-NEXT: [[V81:%.*]] = load i64, i64* [[V80]], align 8 +; CHECK-NEXT: ret i64 [[V81]] +; %.pre = load i8 addrspace(4)*, i8 addrspace(4)** %gp, align 8 %v74 = call i8 addrspace(4)* @alloc() %v75 = addrspacecast i8 addrspace(4)* %v74 to i8* %v76 = bitcast i8* %v75 to i8 addrspace(4)** %v77 = getelementptr i8 addrspace(4)*, i8 addrspace(4)** %v76, i64 -1 - ; CHECK: store store i8 addrspace(4)* %.pre, i8 addrspace(4)** %v77, align 8 %v80 = bitcast i8 addrspace(4)** %v77 to i64* - ; CHECK: load - ; CHECK-NOT: ptrtoint %v81 = load i64, i64* %v80, align 8 ret i64 %v81 } define i64 @g2(i8* addrspace(4)* %gp) { - ; CHECK-LABEL: @g2( - ; CHECK: load +; CHECK-LABEL: @g2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* addrspace(4)* [[GP:%.*]] to i64 addrspace(4)* +; CHECK-NEXT: [[DOTPRE1:%.*]] = load i64, i64 addrspace(4)* [[TMP1]], align 8 +; CHECK-NEXT: [[V74:%.*]] = call i8 addrspace(4)* @alloc() +; CHECK-NEXT: [[V77:%.*]] = getelementptr i8, i8 addrspace(4)* [[V74]], i64 -8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i64 addrspace(4)* +; CHECK-NEXT: store i64 [[DOTPRE1]], i64 addrspace(4)* [[TMP2]], align 8 +; CHECK-NEXT: ret i64 [[DOTPRE1]] +; %.pre = load i8*, i8* addrspace(4)* %gp, align 8 %v74 = call i8 addrspace(4)* @alloc() %v76 = bitcast i8 addrspace(4)* %v74 to i8* addrspace(4)* %v77 = getelementptr i8*, i8* addrspace(4)* %v76, i64 -1 - ; CHECK: store store i8* %.pre, i8* addrspace(4)* %v77, align 8 %v80 = bitcast i8* addrspace(4)* %v77 to i64 addrspace(4)* - ; CHECK-NOT: store %v81 = load i64, i64 addrspace(4)* %v80, align 8 ret i64 %v81 } @@ -82,8 +100,10 @@ define i64 @g2(i8* addrspace(4)* %gp) { declare i8 addrspace(4)* @alloc() define i64 @f_4(i8 addrspace(4)* %v0) { - ; CHECK-LABEL: @f_4( - ; CHECK-NOT: ptrtoint +; CHECK-LABEL: @f_4( +; CHECK-NEXT: [[V6:%.*]] = call i64 bitcast (i64 (i64)* @f_5 to i64 (i8 addrspace(4)*)*)(i8 addrspace(4)* [[V0:%.*]]) +; CHECK-NEXT: ret i64 [[V6]] +; %v5 = bitcast i64 (i64)* @f_5 to i64 (i8 addrspace(4)*)* %v6 = call i64 %v5(i8 addrspace(4)* %v0) ret i64 %v6 From 82dcd383c422f03c2b399af5b94701365cdf1afa Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 3 Oct 2020 16:31:49 +0100 Subject: [PATCH 479/544] [VPlan] Properly update users when updating operands. When updating operands of a VPUser, we also have to adjust the list of users for the new and old VPValues. This is required once we start transitioning recipes to become VPValues. --- llvm/lib/Transforms/Vectorize/VPlanValue.h | 22 ++++- .../Transforms/Vectorize/VPlanTest.cpp | 86 +++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 50cf1285dd4b3..0882837170f20 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -97,6 +97,22 @@ class VPValue { unsigned getNumUsers() const { return Users.size(); } void addUser(VPUser &User) { Users.push_back(&User); } + /// Remove a single \p User from the list of users. + void removeUser(VPUser &User) { + bool Found = false; + // The same user can be added multiple times, e.g. because the same VPValue + // is used twice by the same VPUser. Remove a single one. + erase_if(Users, [&User, &Found](VPUser *Other) { + if (Found) + return false; + if (Other == &User) { + Found = true; + return true; + } + return false; + }); + } + typedef SmallVectorImpl::iterator user_iterator; typedef SmallVectorImpl::const_iterator const_user_iterator; typedef iterator_range user_range; @@ -164,7 +180,11 @@ class VPUser { return Operands[N]; } - void setOperand(unsigned I, VPValue *New) { Operands[I] = New; } + void setOperand(unsigned I, VPValue *New) { + Operands[I]->removeUser(*this); + Operands[I] = New; + New->addUser(*this); + } typedef SmallVectorImpl::iterator operand_iterator; typedef SmallVectorImpl::const_iterator const_operand_iterator; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 46d9899cd054c..a64f9e374ebff 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -87,6 +87,92 @@ TEST(VPInstructionTest, moveAfter) { EXPECT_EQ(I3->getParent(), I4->getParent()); } +TEST(VPInstructionTest, setOperand) { + VPValue *VPV1 = new VPValue(); + VPValue *VPV2 = new VPValue(); + VPInstruction *I1 = new VPInstruction(0, {VPV1, VPV2}); + EXPECT_EQ(1u, VPV1->getNumUsers()); + EXPECT_EQ(I1, *VPV1->user_begin()); + EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_EQ(I1, *VPV2->user_begin()); + + // Replace operand 0 (VPV1) with VPV3. + VPValue *VPV3 = new VPValue(); + I1->setOperand(0, VPV3); + EXPECT_EQ(0u, VPV1->getNumUsers()); + EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_EQ(I1, *VPV2->user_begin()); + EXPECT_EQ(1u, VPV3->getNumUsers()); + EXPECT_EQ(I1, *VPV3->user_begin()); + + // Replace operand 1 (VPV2) with VPV3. + I1->setOperand(1, VPV3); + EXPECT_EQ(0u, VPV1->getNumUsers()); + EXPECT_EQ(0u, VPV2->getNumUsers()); + EXPECT_EQ(2u, VPV3->getNumUsers()); + EXPECT_EQ(I1, *VPV3->user_begin()); + EXPECT_EQ(I1, *std::next(VPV3->user_begin())); + + // Replace operand 0 (VPV3) with VPV4. + VPValue *VPV4 = new VPValue(); + I1->setOperand(0, VPV4); + EXPECT_EQ(1u, VPV3->getNumUsers()); + EXPECT_EQ(I1, *VPV3->user_begin()); + EXPECT_EQ(I1, *VPV4->user_begin()); + + // Replace operand 1 (VPV3) with VPV4. + I1->setOperand(1, VPV4); + EXPECT_EQ(0u, VPV3->getNumUsers()); + EXPECT_EQ(I1, *VPV4->user_begin()); + EXPECT_EQ(I1, *std::next(VPV4->user_begin())); + + delete I1; + delete VPV1; + delete VPV2; + delete VPV3; + delete VPV4; +} + +TEST(VPInstructionTest, replaceAllUsesWith) { + VPValue *VPV1 = new VPValue(); + VPValue *VPV2 = new VPValue(); + VPInstruction *I1 = new VPInstruction(0, {VPV1, VPV2}); + + // Replace all uses of VPV1 with VPV3. + VPValue *VPV3 = new VPValue(); + VPV1->replaceAllUsesWith(VPV3); + EXPECT_EQ(VPV3, I1->getOperand(0)); + EXPECT_EQ(VPV2, I1->getOperand(1)); + EXPECT_EQ(0u, VPV1->getNumUsers()); + EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_EQ(I1, *VPV2->user_begin()); + EXPECT_EQ(1u, VPV3->getNumUsers()); + EXPECT_EQ(I1, *VPV3->user_begin()); + + // Replace all uses of VPV2 with VPV3. + VPV2->replaceAllUsesWith(VPV3); + EXPECT_EQ(VPV3, I1->getOperand(0)); + EXPECT_EQ(VPV3, I1->getOperand(1)); + EXPECT_EQ(0u, VPV1->getNumUsers()); + EXPECT_EQ(0u, VPV2->getNumUsers()); + EXPECT_EQ(2u, VPV3->getNumUsers()); + EXPECT_EQ(I1, *VPV3->user_begin()); + + // Replace all uses of VPV3 with VPV1. + VPV3->replaceAllUsesWith(VPV1); + EXPECT_EQ(VPV1, I1->getOperand(0)); + EXPECT_EQ(VPV1, I1->getOperand(1)); + EXPECT_EQ(2u, VPV1->getNumUsers()); + EXPECT_EQ(I1, *VPV1->user_begin()); + EXPECT_EQ(0u, VPV2->getNumUsers()); + EXPECT_EQ(0u, VPV3->getNumUsers()); + + delete I1; + delete VPV1; + delete VPV2; + delete VPV3; +} + TEST(VPBasicBlockTest, getPlan) { { VPBasicBlock *VPBB1 = new VPBasicBlock(); From 508ac0ec13c1c56029fd2390a2e14c1b2ea84b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 3 Oct 2020 22:16:29 +0200 Subject: [PATCH 480/544] [lldb] [test/Register] Mark new FP reg tests XFAIL on Windows --- lldb/test/Shell/Register/x86-64-fp-write.test | 1 + lldb/test/Shell/Register/x86-fp-read.test | 1 + lldb/test/Shell/Register/x86-fp-write.test | 1 + 3 files changed, 3 insertions(+) diff --git a/lldb/test/Shell/Register/x86-64-fp-write.test b/lldb/test/Shell/Register/x86-64-fp-write.test index b2e8c271b51bb..38c8942091185 100644 --- a/lldb/test/Shell/Register/x86-64-fp-write.test +++ b/lldb/test/Shell/Register/x86-64-fp-write.test @@ -1,3 +1,4 @@ +# XFAIL: system-windows # REQUIRES: native && target-x86_64 # RUN: %clangxx_host %p/Inputs/x86-fp-write.cpp -o %t # RUN: %lldb -b -s %s %t | FileCheck %s diff --git a/lldb/test/Shell/Register/x86-fp-read.test b/lldb/test/Shell/Register/x86-fp-read.test index 9ecc5634e7293..42c85baa22953 100644 --- a/lldb/test/Shell/Register/x86-fp-read.test +++ b/lldb/test/Shell/Register/x86-fp-read.test @@ -1,3 +1,4 @@ +# XFAIL: system-windows # REQUIRES: native && (target-x86 || target-x86_64) # RUN: %clangxx_host -g %p/Inputs/x86-fp-read.cpp -o %t # RUN: %lldb -b -s %s %t | FileCheck %s diff --git a/lldb/test/Shell/Register/x86-fp-write.test b/lldb/test/Shell/Register/x86-fp-write.test index 81f542c419afe..a88bbfd8ce009 100644 --- a/lldb/test/Shell/Register/x86-fp-write.test +++ b/lldb/test/Shell/Register/x86-fp-write.test @@ -1,3 +1,4 @@ +# XFAIL: system-windows # REQUIRES: native && target-x86 # RUN: %clangxx_host %p/Inputs/x86-fp-write.cpp -o %t # RUN: %lldb -b -s %s %t | FileCheck %s From 0a3523299dec61f2e6eb2a28fdecd25360e8b6d8 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 1 Oct 2020 14:35:11 -0700 Subject: [PATCH 481/544] [MC] Assert that MCRegUnitIterator operates over MCRegisters The signature of the ctor expects a MCRegister, but currently any unsigned value can be converted to a MCRegister. This patch checks that indeed the provided value is a physical register only. We want to eventually stop implicitly converting unsigned or Register to MCRegister (which is incorrect). The next step after this patch is changing uses of MCRegUnitIterator to explicitly cast Register or unsigned values to MCRegister. To that end, this patch also introduces 2 APIs that make that conversion checked and explicit. Differential Revision: https://reviews.llvm.org/D88705 --- llvm/include/llvm/CodeGen/Register.h | 9 +++++++++ llvm/include/llvm/MC/MCRegister.h | 6 ++++++ llvm/include/llvm/MC/MCRegisterInfo.h | 1 + 3 files changed, 16 insertions(+) diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index 884c8bc7dc2ec..86dde8ed2903c 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -110,6 +110,15 @@ class Register { return MCRegister(Reg); } + /// Utility to check-convert this value to a MCRegister. The caller is + /// expected to have already validated that this Register is, indeed, + /// physical. + MCRegister asMCReg() const { + assert(Reg == MCRegister::NoRegister || + MCRegister::isPhysicalRegister(Reg)); + return MCRegister(Reg); + } + bool isValid() const { return Reg != MCRegister::NoRegister; } /// Comparisons between register objects diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h index 5f2e31b70fd8d..21ffe28ef6a78 100644 --- a/llvm/include/llvm/MC/MCRegister.h +++ b/llvm/include/llvm/MC/MCRegister.h @@ -68,6 +68,12 @@ class MCRegister { return Reg; } + /// Check the provided unsigned value is a valid MCRegister. + static MCRegister from(unsigned Val) { + assert(Val == NoRegister || isPhysicalRegister(Val)); + return MCRegister(Val); + } + unsigned id() const { return Reg; } diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index 9864d95d19e00..0c1ac6254ec12 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -675,6 +675,7 @@ class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator { MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) { assert(Reg && "Null register has no regunits"); + assert(MCRegister::isPhysicalRegister(Reg.id())); // Decode the RegUnits MCRegisterDesc field. unsigned RU = MCRI->get(Reg).RegUnits; unsigned Scale = RU & 15; From d20c602aad7cc7d116df3bf8c17c533ef361ee61 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Sat, 3 Oct 2020 14:18:38 -0700 Subject: [PATCH 482/544] [Object][MachO] Refactor MachOUniversalWriter This diff refactors writeUniversalBinary and adds writeUniversalBinaryToBuffer. This is a preparation for adding support for universal binaries to llvm-objcopy. Test plan: make check-all Differential revision: https://reviews.llvm.org/D88372 --- .../llvm/Object/MachOUniversalWriter.h | 3 + llvm/lib/Object/MachOUniversalWriter.cpp | 83 ++++++++++++------- 2 files changed, 55 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h index 5a94edb8821c2..49352440dca17 100644 --- a/llvm/include/llvm/Object/MachOUniversalWriter.h +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -86,6 +86,9 @@ class Slice { Error writeUniversalBinary(ArrayRef Slices, StringRef OutputFileName); +Expected> +writeUniversalBinaryToBuffer(ArrayRef Slices); + } // end namespace object } // end namespace llvm diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp index 2d7bb436e5356..165964e077ce3 100644 --- a/llvm/lib/Object/MachOUniversalWriter.cpp +++ b/llvm/lib/Object/MachOUniversalWriter.cpp @@ -19,7 +19,7 @@ #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" -#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" using namespace llvm; using namespace object; @@ -258,8 +258,8 @@ buildFatArchList(ArrayRef Slices) { return FatArchList; } -Error object::writeUniversalBinary(ArrayRef Slices, - StringRef OutputFileName) { +static Error writeUniversalBinaryToStream(ArrayRef Slices, + raw_ostream &Out) { MachO::fat_header FatHeader; FatHeader.magic = MachO::FAT_MAGIC; FatHeader.nfat_arch = Slices.size(); @@ -270,42 +270,63 @@ Error object::writeUniversalBinary(ArrayRef Slices, return FatArchListOrErr.takeError(); SmallVector FatArchList = *FatArchListOrErr; - const bool IsExecutable = any_of(Slices, [](Slice S) { - return sys::fs::can_execute(S.getBinary()->getFileName()); - }); - const uint64_t OutputFileSize = - static_cast(FatArchList.back().offset) + - FatArchList.back().size; - Expected> OutFileOrError = - FileOutputBuffer::create(OutputFileName, OutputFileSize, - IsExecutable ? FileOutputBuffer::F_executable - : 0); - if (!OutFileOrError) - return createFileError(OutputFileName, OutFileOrError.takeError()); - std::unique_ptr OutFile = std::move(OutFileOrError.get()); - std::memset(OutFile->getBufferStart(), 0, OutputFileSize); - if (sys::IsLittleEndianHost) MachO::swapStruct(FatHeader); - std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header)); + Out.write(reinterpret_cast(&FatHeader), + sizeof(MachO::fat_header)); - for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { - MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef(); - std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(), - OutFile->getBufferStart() + FatArchList[Index].offset); - } + if (sys::IsLittleEndianHost) + for (MachO::fat_arch &FA : FatArchList) + MachO::swapStruct(FA); + Out.write(reinterpret_cast(FatArchList.data()), + sizeof(MachO::fat_arch) * FatArchList.size()); - // FatArchs written after Slices in order to reduce the number of swaps for - // the LittleEndian case if (sys::IsLittleEndianHost) for (MachO::fat_arch &FA : FatArchList) MachO::swapStruct(FA); - std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header), - FatArchList.begin(), - sizeof(MachO::fat_arch) * FatArchList.size()); - if (Error E = OutFile->commit()) - return createFileError(OutputFileName, std::move(E)); + size_t Offset = + sizeof(MachO::fat_header) + sizeof(MachO::fat_arch) * FatArchList.size(); + for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { + MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef(); + assert((Offset <= FatArchList[Index].offset) && "Incorrect slice offset"); + Out.write_zeros(FatArchList[Index].offset - Offset); + Out.write(BufferRef.getBufferStart(), BufferRef.getBufferSize()); + Offset = FatArchList[Index].offset + BufferRef.getBufferSize(); + } + Out.flush(); return Error::success(); } + +Error object::writeUniversalBinary(ArrayRef Slices, + StringRef OutputFileName) { + const bool IsExecutable = any_of(Slices, [](Slice S) { + return sys::fs::can_execute(S.getBinary()->getFileName()); + }); + unsigned Mode = sys::fs::all_read | sys::fs::all_write; + if (IsExecutable) + Mode |= sys::fs::all_exe; + Expected Temp = sys::fs::TempFile::create( + OutputFileName + ".temp-universal-%%%%%%", Mode); + if (!Temp) + return Temp.takeError(); + raw_fd_ostream Out(Temp->FD, false); + if (Error E = writeUniversalBinaryToStream(Slices, Out)) { + if (Error DiscardError = Temp->discard()) + return joinErrors(std::move(E), std::move(DiscardError)); + return E; + } + return Temp->keep(OutputFileName); +} + +Expected> +object::writeUniversalBinaryToBuffer(ArrayRef Slices) { + SmallVector Buffer; + raw_svector_ostream Out(Buffer); + + if (Error E = writeUniversalBinaryToStream(Slices, Out)) + return std::move(E); + + return std::make_unique(std::move(Buffer)); +} From 9b851527d53345c4a5d56a909dfa1ca7f59a0c11 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Sat, 3 Oct 2020 15:17:38 -0700 Subject: [PATCH 483/544] Add indented raw_ostream class Class simplifies keeping track of the indentation while emitting. For every new line the current indentation is simply prefixed (if not at start of line, then it just emits as normal). Add a simple Region helper that makes it easy to have the C++ scope match the emitted scope. Use this in op doc generator and rewrite generator. This reverts revert commit be185b6a7355fdfeb1c31df2e1272366fe58b01f addresses shared lib failure by fixing up cmake files. Differential Revision: https://reviews.llvm.org/D84107 --- mlir/include/mlir/Support/IndentedOstream.h | 102 +++++++ mlir/lib/Support/CMakeLists.txt | 10 + mlir/lib/Support/IndentedOstream.cpp | 65 +++++ mlir/tools/mlir-tblgen/CMakeLists.txt | 1 + mlir/tools/mlir-tblgen/OpDocGen.cpp | 40 +-- mlir/tools/mlir-tblgen/RewriterGen.cpp | 250 +++++++++--------- mlir/unittests/Support/CMakeLists.txt | 6 + .../unittests/Support/IndentedOstreamTest.cpp | 110 ++++++++ 8 files changed, 418 insertions(+), 166 deletions(-) create mode 100644 mlir/include/mlir/Support/IndentedOstream.h create mode 100644 mlir/lib/Support/IndentedOstream.cpp create mode 100644 mlir/unittests/Support/CMakeLists.txt create mode 100644 mlir/unittests/Support/IndentedOstreamTest.cpp diff --git a/mlir/include/mlir/Support/IndentedOstream.h b/mlir/include/mlir/Support/IndentedOstream.h new file mode 100644 index 0000000000000..20161c1f3898f --- /dev/null +++ b/mlir/include/mlir/Support/IndentedOstream.h @@ -0,0 +1,102 @@ +//===- IndentedOstream.h - raw ostream wrapper to indent --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// raw_ostream subclass that keeps track of indentation for textual output +// where indentation helps readability. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_SUPPORT_INDENTEDOSTREAM_H_ +#define MLIR_SUPPORT_INDENTEDOSTREAM_H_ + +#include "mlir/Support/LLVM.h" +#include "llvm/Support/raw_ostream.h" + +namespace mlir { + +/// raw_ostream subclass that simplifies indention a sequence of code. +class raw_indented_ostream : public raw_ostream { +public: + explicit raw_indented_ostream(llvm::raw_ostream &os) : os(os) { + SetUnbuffered(); + } + + /// Simple RAII struct to use to indentation around entering/exiting region. + struct DelimitedScope { + explicit DelimitedScope(raw_indented_ostream &os, StringRef open = "", + StringRef close = "") + : os(os), open(open), close(close) { + os << open; + os.indent(); + } + ~DelimitedScope() { + os.unindent(); + os << close; + } + + raw_indented_ostream &os; + + private: + llvm::StringRef open, close; + }; + + /// Returns DelimitedScope. + DelimitedScope scope(StringRef open = "", StringRef close = "") { + return DelimitedScope(*this, open, close); + } + + /// Re-indents by removing the leading whitespace from the first non-empty + /// line from every line of the the string, skipping over empty lines at the + /// start. + raw_indented_ostream &reindent(StringRef str); + + /// Increases the indent and returning this raw_indented_ostream. + raw_indented_ostream &indent() { + currentIndent += indentSize; + return *this; + } + + /// Decreases the indent and returning this raw_indented_ostream. + raw_indented_ostream &unindent() { + currentIndent = std::max(0, currentIndent - indentSize); + return *this; + } + + /// Emits whitespace and sets the indendation for the stream. + raw_indented_ostream &indent(int with) { + os.indent(with); + atStartOfLine = false; + currentIndent = with; + return *this; + } + +private: + void write_impl(const char *ptr, size_t size) override; + + /// Return the current position within the stream, not counting the bytes + /// currently in the buffer. + uint64_t current_pos() const override { return os.tell(); } + + /// Constant indent added/removed. + static constexpr int indentSize = 2; + + // Tracker for current indentation. + int currentIndent = 0; + + // The leading whitespace of the string being printed, if reindent is used. + int leadingWs = 0; + + // Tracks whether at start of line and so indent is required or not. + bool atStartOfLine = true; + + // The underlying raw_ostream. + raw_ostream &os; +}; + +} // namespace mlir +#endif // MLIR_SUPPORT_INDENTEDOSTREAM_H_ diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt index bdba990571721..df72bc97cef1f 100644 --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_OPTIONAL_SOURCES FileUtilities.cpp + IndentedOstream.cpp MlirOptMain.cpp StorageUniquer.cpp ToolUtilities.cpp @@ -27,3 +28,12 @@ add_mlir_library(MLIROptLib MLIRParser MLIRSupport ) + +# This doesn't use add_mlir_library as it is used in mlir-tblgen and else +# mlir-tblgen ends up depending on mlir-generic-headers. +add_llvm_library(MLIRSupportIndentedOstream + IndentedOstream.cpp + + LINK_LIBS PUBLIC + LLVMSupport + ) diff --git a/mlir/lib/Support/IndentedOstream.cpp b/mlir/lib/Support/IndentedOstream.cpp new file mode 100644 index 0000000000000..bb3feef6c4458 --- /dev/null +++ b/mlir/lib/Support/IndentedOstream.cpp @@ -0,0 +1,65 @@ +//===- IndentedOstream.cpp - raw ostream wrapper to indent ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// raw_ostream subclass that keeps track of indentation for textual output +// where indentation helps readability. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/IndentedOstream.h" + +using namespace mlir; + +raw_indented_ostream &mlir::raw_indented_ostream::reindent(StringRef str) { + StringRef remaining = str; + // Find leading whitespace indent. + while (!remaining.empty()) { + auto split = remaining.split('\n'); + size_t indent = split.first.find_first_not_of(" \t"); + if (indent != StringRef::npos) { + leadingWs = indent; + break; + } + remaining = split.second; + } + // Print, skipping the empty lines. + *this << remaining; + leadingWs = 0; + return *this; +} + +void mlir::raw_indented_ostream::write_impl(const char *ptr, size_t size) { + StringRef str(ptr, size); + // Print out indented. + auto print = [this](StringRef str) { + if (atStartOfLine) + os.indent(currentIndent) << str.substr(leadingWs); + else + os << str.substr(leadingWs); + }; + + while (!str.empty()) { + size_t idx = str.find('\n'); + if (idx == StringRef::npos) { + if (!str.substr(leadingWs).empty()) { + print(str); + atStartOfLine = false; + } + break; + } + + auto split = + std::make_pair(str.slice(0, idx), str.slice(idx + 1, StringRef::npos)); + // Print empty new line without spaces if line only has spaces. + if (!split.first.ltrim().empty()) + print(split.first); + os << '\n'; + atStartOfLine = true; + str = split.second; + } +} diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index 46b9d81115c9b..40447a89082f7 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -25,6 +25,7 @@ add_tablegen(mlir-tblgen MLIR set_target_properties(mlir-tblgen PROPERTIES FOLDER "Tablegenning") target_link_libraries(mlir-tblgen PRIVATE + MLIRSupportIndentedOstream MLIRTableGen) mlir_check_all_link_libraries(mlir-tblgen) diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index df78556c1c77b..ff6a290397630 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DocGenUtilities.h" +#include "mlir/Support/IndentedOstream.h" #include "mlir/TableGen/GenInfo.h" #include "mlir/TableGen/Operator.h" #include "llvm/ADT/DenseMap.h" @@ -35,39 +36,8 @@ using mlir::tblgen::Operator; // in a way the user wanted but has some additional indenting due to being // nested in the op definition. void mlir::tblgen::emitDescription(StringRef description, raw_ostream &os) { - // Determine the minimum number of spaces in a line. - size_t min_indent = -1; - StringRef remaining = description; - while (!remaining.empty()) { - auto split = remaining.split('\n'); - size_t indent = split.first.find_first_not_of(" \t"); - if (indent != StringRef::npos) - min_indent = std::min(indent, min_indent); - remaining = split.second; - } - - // Print out the description indented. - os << "\n"; - remaining = description; - bool printed = false; - while (!remaining.empty()) { - auto split = remaining.split('\n'); - if (split.second.empty()) { - // Skip last line with just spaces. - if (split.first.ltrim().empty()) - break; - } - // Print empty new line without spaces if line only has spaces, unless no - // text has been emitted before. - if (split.first.ltrim().empty()) { - if (printed) - os << "\n"; - } else { - os << split.first.substr(min_indent) << "\n"; - printed = true; - } - remaining = split.second; - } + raw_indented_ostream ros(os); + ros.reindent(description.rtrim(" \t")); } // Emits `str` with trailing newline if not empty. @@ -116,7 +86,7 @@ static void emitOpDoc(Operator op, raw_ostream &os) { // Emit the summary, syntax, and description if present. if (op.hasSummary()) - os << "\n" << op.getSummary() << "\n"; + os << "\n" << op.getSummary() << "\n\n"; if (op.hasAssemblyFormat()) emitAssemblyFormat(op.getOperationName(), op.getAssemblyFormat().trim(), os); @@ -228,7 +198,7 @@ static void emitDialectDoc(const RecordKeeper &recordKeeper, raw_ostream &os) { } os << "\n"; - for (auto dialectWithOps : dialectOps) + for (const auto &dialectWithOps : dialectOps) emitDialectDoc(dialectWithOps.first, dialectWithOps.second, dialectTypes[dialectWithOps.first], os); } diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index 9b2f35f566246..e16900227759d 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Support/IndentedOstream.h" #include "mlir/TableGen/Attribute.h" #include "mlir/TableGen/Format.h" #include "mlir/TableGen/GenInfo.h" @@ -77,11 +78,11 @@ class PatternEmitter { // Emits C++ statements for matching the `argIndex`-th argument of the given // DAG `tree` as an operand. - void emitOperandMatch(DagNode tree, int argIndex, int depth, int indent); + void emitOperandMatch(DagNode tree, int argIndex, int depth); // Emits C++ statements for matching the `argIndex`-th argument of the given // DAG `tree` as an attribute. - void emitAttributeMatch(DagNode tree, int argIndex, int depth, int indent); + void emitAttributeMatch(DagNode tree, int argIndex, int depth); // Emits C++ for checking a match with a corresponding match failure // diagnostic. @@ -184,7 +185,7 @@ class PatternEmitter { // The next unused ID for newly created values. unsigned nextValueId; - raw_ostream &os; + raw_indented_ostream os; // Format contexts containing placeholder substitutions. FmtContext fmtCtx; @@ -225,8 +226,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { // Skip the operand matching at depth 0 as the pattern rewriter already does. if (depth != 0) { // Skip if there is no defining operation (e.g., arguments to function). - os.indent(indent) << formatv("if (!castedOp{0}) return failure();\n", - depth); + os << formatv("if (!castedOp{0})\n return failure();\n", depth); } if (tree.getNumArgs() != op.getNumArgs()) { PrintFatalError(loc, formatv("op '{0}' argument number mismatch: {1} in " @@ -238,7 +238,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { // If the operand's name is set, set to that variable. auto name = tree.getSymbol(); if (!name.empty()) - os.indent(indent) << formatv("{0} = castedOp{1};\n", name, depth); + os << formatv("{0} = castedOp{1};\n", name, depth); for (int i = 0, e = tree.getNumArgs(); i != e; ++i) { auto opArg = op.getArg(i); @@ -253,24 +253,23 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { PrintFatalError(loc, error); } } - os.indent(indent) << "{\n"; + os << "{\n"; - os.indent(indent + 2) << formatv( + os.indent() << formatv( "auto *op{0} = " "(*castedOp{1}.getODSOperands({2}).begin()).getDefiningOp();\n", depth + 1, depth, i); emitOpMatch(argTree, depth + 1); - os.indent(indent + 2) - << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1); - os.indent(indent) << "}\n"; + os << formatv("tblgen_ops[{0}] = op{1};\n", ++opCounter, depth + 1); + os.unindent() << "}\n"; continue; } // Next handle DAG leaf: operand or attribute if (opArg.is()) { - emitOperandMatch(tree, i, depth, indent); + emitOperandMatch(tree, i, depth); } else if (opArg.is()) { - emitAttributeMatch(tree, i, depth, indent); + emitAttributeMatch(tree, i, depth); } else { PrintFatalError(loc, "unhandled case when matching op"); } @@ -280,8 +279,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) { << '\n'); } -void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth, - int indent) { +void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth) { Operator &op = tree.getDialectOp(opMap); auto *operand = op.getArg(argIndex).get(); auto matcher = tree.getArgAsLeaf(argIndex); @@ -328,30 +326,28 @@ void PatternEmitter::emitOperandMatch(DagNode tree, int argIndex, int depth, op.arg_begin(), op.arg_begin() + argIndex, [](const Argument &arg) { return arg.is(); }); - os.indent(indent) << formatv("{0} = castedOp{1}.getODSOperands({2});\n", - name, depth, argIndex - numPrevAttrs); + os << formatv("{0} = castedOp{1}.getODSOperands({2});\n", name, depth, + argIndex - numPrevAttrs); } } -void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth, - int indent) { +void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth) { Operator &op = tree.getDialectOp(opMap); auto *namedAttr = op.getArg(argIndex).get(); const auto &attr = namedAttr->attr; - os.indent(indent) << "{\n"; - indent += 2; - os.indent(indent) << formatv( - "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\");" + os << "{\n"; + os.indent() << formatv( + "auto tblgen_attr = op{0}->getAttrOfType<{1}>(\"{2}\"); " "(void)tblgen_attr;\n", depth, attr.getStorageType(), namedAttr->name); // TODO: This should use getter method to avoid duplication. if (attr.hasDefaultValue()) { - os.indent(indent) << "if (!tblgen_attr) tblgen_attr = " - << std::string(tgfmt(attr.getConstBuilderTemplate(), - &fmtCtx, attr.getDefaultValue())) - << ";\n"; + os << "if (!tblgen_attr) tblgen_attr = " + << std::string(tgfmt(attr.getConstBuilderTemplate(), &fmtCtx, + attr.getDefaultValue())) + << ";\n"; } else if (attr.isOptional()) { // For a missing attribute that is optional according to definition, we // should just capture a mlir::Attribute() to signal the missing state. @@ -387,27 +383,20 @@ void PatternEmitter::emitAttributeMatch(DagNode tree, int argIndex, int depth, auto name = tree.getArgName(argIndex); // `$_` is a special symbol to ignore op argument matching. if (!name.empty() && name != "_") { - os.indent(indent) << formatv("{0} = tblgen_attr;\n", name); + os << formatv("{0} = tblgen_attr;\n", name); } - indent -= 2; - os.indent(indent) << "}\n"; + os.unindent() << "}\n"; } void PatternEmitter::emitMatchCheck( int depth, const FmtObjectBase &matchFmt, const llvm::formatv_object_base &failureFmt) { - // {0} The match depth (used to get the operation that failed to match). - // {1} The format for the match string. - // {2} The format for the failure string. - const char *matchStr = R"( - if (!({1})) { - return rewriter.notifyMatchFailure(op{0}, [&](::mlir::Diagnostic &diag) { - diag << {2}; - }); - })"; - os << llvm::formatv(matchStr, depth, matchFmt.str(), failureFmt.str()) - << "\n"; + os << "if (!(" << matchFmt.str() << "))"; + os.scope("{\n", "\n}\n").os + << "return rewriter.notifyMatchFailure(op" << depth + << ", [&](::mlir::Diagnostic &diag) {\n diag << " << failureFmt.str() + << ";\n});"; } void PatternEmitter::emitMatchLogic(DagNode tree) { @@ -491,7 +480,7 @@ void PatternEmitter::emit(StringRef rewriteName) { // Emit RewritePattern for Pattern. auto locs = pattern.getLocation(); - os << formatv("/* Generated from:\n\t{0:$[ instantiating\n\t]}\n*/\n", + os << formatv("/* Generated from:\n {0:$[ instantiating\n ]}\n*/\n", make_range(locs.rbegin(), locs.rend())); os << formatv(R"(struct {0} : public ::mlir::RewritePattern { {0}(::mlir::MLIRContext *context) @@ -509,44 +498,48 @@ void PatternEmitter::emit(StringRef rewriteName) { os << formatv(R"(}, {0}, context) {{})", pattern.getBenefit()) << "\n"; // Emit matchAndRewrite() function. - os << R"( - ::mlir::LogicalResult - matchAndRewrite(::mlir::Operation *op0, - ::mlir::PatternRewriter &rewriter) const override { -)"; - - // Register all symbols bound in the source pattern. - pattern.collectSourcePatternBoundSymbols(symbolInfoMap); - - LLVM_DEBUG( - llvm::dbgs() << "start creating local variables for capturing matches\n"); - os.indent(4) << "// Variables for capturing values and attributes used for " - "creating ops\n"; - // Create local variables for storing the arguments and results bound - // to symbols. - for (const auto &symbolInfoPair : symbolInfoMap) { - StringRef symbol = symbolInfoPair.getKey(); - auto &info = symbolInfoPair.getValue(); - os.indent(4) << info.getVarDecl(symbol); + { + auto classScope = os.scope(); + os.reindent(R"( + ::mlir::LogicalResult matchAndRewrite(::mlir::Operation *op0, + ::mlir::PatternRewriter &rewriter) const override {)") + << '\n'; + { + auto functionScope = os.scope(); + + // Register all symbols bound in the source pattern. + pattern.collectSourcePatternBoundSymbols(symbolInfoMap); + + LLVM_DEBUG(llvm::dbgs() + << "start creating local variables for capturing matches\n"); + os << "// Variables for capturing values and attributes used while " + "creating ops\n"; + // Create local variables for storing the arguments and results bound + // to symbols. + for (const auto &symbolInfoPair : symbolInfoMap) { + StringRef symbol = symbolInfoPair.getKey(); + auto &info = symbolInfoPair.getValue(); + os << info.getVarDecl(symbol); + } + // TODO: capture ops with consistent numbering so that it can be + // reused for fused loc. + os << formatv("::mlir::Operation *tblgen_ops[{0}];\n\n", + pattern.getSourcePattern().getNumOps()); + LLVM_DEBUG(llvm::dbgs() + << "done creating local variables for capturing matches\n"); + + os << "// Match\n"; + os << "tblgen_ops[0] = op0;\n"; + emitMatchLogic(sourceTree); + + os << "\n// Rewrite\n"; + emitRewriteLogic(); + + os << "return success();\n"; + } + os << "};\n"; } - // TODO: capture ops with consistent numbering so that it can be - // reused for fused loc. - os.indent(4) << formatv("::mlir::Operation *tblgen_ops[{0}];\n\n", - pattern.getSourcePattern().getNumOps()); - LLVM_DEBUG( - llvm::dbgs() << "done creating local variables for capturing matches\n"); - - os.indent(4) << "// Match\n"; - os.indent(4) << "tblgen_ops[0] = op0;\n"; - emitMatchLogic(sourceTree); - os << "\n"; - - os.indent(4) << "// Rewrite\n"; - emitRewriteLogic(); - - os.indent(4) << "return success();\n"; - os << " };\n"; - os << "};\n"; + os << "};\n\n"; } void PatternEmitter::emitRewriteLogic() { @@ -586,7 +579,7 @@ void PatternEmitter::emitRewriteLogic() { PrintFatalError(loc, error); } - os.indent(4) << "auto odsLoc = rewriter.getFusedLoc({"; + os << "auto odsLoc = rewriter.getFusedLoc({"; for (int i = 0, e = pattern.getSourcePattern().getNumOps(); i != e; ++i) { os << (i ? ", " : "") << "tblgen_ops[" << i << "]->getLoc()"; } @@ -601,22 +594,21 @@ void PatternEmitter::emitRewriteLogic() { // we are handling auxiliary patterns so we want the side effect even if // NativeCodeCall is not replacing matched root op's results. if (resultTree.isNativeCodeCall()) - os.indent(4) << val << ";\n"; + os << val << ";\n"; } if (numExpectedResults == 0) { assert(replStartIndex >= numResultPatterns && "invalid auxiliary vs. replacement pattern division!"); // No result to replace. Just erase the op. - os.indent(4) << "rewriter.eraseOp(op0);\n"; + os << "rewriter.eraseOp(op0);\n"; } else { // Process replacement result patterns. - os.indent(4) - << "::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values;\n"; + os << "::llvm::SmallVector<::mlir::Value, 4> tblgen_repl_values;\n"; for (int i = replStartIndex; i < numResultPatterns; ++i) { DagNode resultTree = pattern.getResultPattern(i); auto val = handleResultPattern(resultTree, offsets[i], 0); - os.indent(4) << "\n"; + os << "\n"; // Resolve each symbol for all range use so that we can loop over them. // We need an explicit cast to `SmallVector` to capture the cases where // `{0}` resolves to an `Operation::result_range` as well as cases that @@ -625,12 +617,11 @@ void PatternEmitter::emitRewriteLogic() { // TODO: Revisit the need for materializing a vector. os << symbolInfoMap.getAllRangeUse( val, - " for (auto v : ::llvm::SmallVector<::mlir::Value, 4>{ {0} }) {{ " - "tblgen_repl_values.push_back(v); }", + "for (auto v: ::llvm::SmallVector<::mlir::Value, 4>{ {0} }) {{\n" + " tblgen_repl_values.push_back(v);\n}\n", "\n"); } - os.indent(4) << "\n"; - os.indent(4) << "rewriter.replaceOp(op0, tblgen_repl_values);\n"; + os << "\nrewriter.replaceOp(op0, tblgen_repl_values);\n"; } LLVM_DEBUG(llvm::dbgs() << "--- done emitting rewrite logic ---\n"); @@ -879,9 +870,8 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, } // Create the local variable for this op. - os.indent(4) << formatv("{0} {1};\n", resultOp.getQualCppClassName(), - valuePackName); - os.indent(4) << "{\n"; + os << formatv("{0} {1};\n{{\n", resultOp.getQualCppClassName(), + valuePackName); // Right now ODS don't have general type inference support. Except a few // special cases listed below, DRR needs to supply types for all results @@ -900,10 +890,9 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, createAggregateLocalVarsForOpArgs(tree, childNodeNames); // Then create the op. - os.indent(6) << formatv( - "{0} = rewriter.create<{1}>({2}, tblgen_values, tblgen_attrs);\n", + os.scope("", "\n}\n").os << formatv( + "{0} = rewriter.create<{1}>({2}, tblgen_values, tblgen_attrs);", valuePackName, resultOp.getQualCppClassName(), locToUse); - os.indent(4) << "}\n"; return resultValue; } @@ -920,11 +909,10 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // aggregate-parameter builders. createSeparateLocalVarsForOpArgs(tree, childNodeNames); - os.indent(6) << formatv("{0} = rewriter.create<{1}>({2}", valuePackName, - resultOp.getQualCppClassName(), locToUse); + os.scope().os << formatv("{0} = rewriter.create<{1}>({2}", valuePackName, + resultOp.getQualCppClassName(), locToUse); supplyValuesForOpArgs(tree, childNodeNames); - os << "\n );\n"; - os.indent(4) << "}\n"; + os << "\n );\n}\n"; return resultValue; } @@ -938,20 +926,19 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex, // Then prepare the result types. We need to specify the types for all // results. - os.indent(6) << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " - "(void)tblgen_types;\n"); + os.indent() << formatv("::mlir::SmallVector<::mlir::Type, 4> tblgen_types; " + "(void)tblgen_types;\n"); int numResults = resultOp.getNumResults(); if (numResults != 0) { for (int i = 0; i < numResults; ++i) - os.indent(6) << formatv("for (auto v : castedOp0.getODSResults({0})) {{" - "tblgen_types.push_back(v.getType()); }\n", - resultIndex + i); + os << formatv("for (auto v: castedOp0.getODSResults({0})) {{\n" + " tblgen_types.push_back(v.getType());\n}\n", + resultIndex + i); } - os.indent(6) << formatv("{0} = rewriter.create<{1}>({2}, tblgen_types, " - "tblgen_values, tblgen_attrs);\n", - valuePackName, resultOp.getQualCppClassName(), - locToUse); - os.indent(4) << "}\n"; + os << formatv("{0} = rewriter.create<{1}>({2}, tblgen_types, " + "tblgen_values, tblgen_attrs);\n", + valuePackName, resultOp.getQualCppClassName(), locToUse); + os.unindent() << "}\n"; return resultValue; } @@ -968,16 +955,15 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) { const auto *operand = resultOp.getArg(argIndex).dyn_cast(); - if (!operand) { - // We do not need special handling for attributes. + // We do not need special handling for attributes. + if (!operand) continue; - } + raw_indented_ostream::DelimitedScope scope(os); std::string varName; if (operand->isVariadic()) { varName = std::string(formatv("tblgen_values_{0}", valueIndex++)); - os.indent(6) << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", - varName); + os << formatv("::mlir::SmallVector<::mlir::Value, 4> {0};\n", varName); std::string range; if (node.isNestedDagArg(argIndex)) { range = childNodeNames[argIndex]; @@ -987,11 +973,11 @@ void PatternEmitter::createSeparateLocalVarsForOpArgs( // Resolve the symbol for all range use so that we have a uniform way of // capturing the values. range = symbolInfoMap.getValueAndRangeUse(range); - os.indent(6) << formatv("for (auto v : {0}) {1}.push_back(v);\n", range, - varName); + os << formatv("for (auto v: {0}) {{\n {1}.push_back(v);\n}\n", range, + varName); } else { varName = std::string(formatv("tblgen_value_{0}", valueIndex++)); - os.indent(6) << formatv("::mlir::Value {0} = ", varName); + os << formatv("::mlir::Value {0} = ", varName); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse(childNodeNames[argIndex]); } else { @@ -1019,7 +1005,7 @@ void PatternEmitter::supplyValuesForOpArgs( for (int argIndex = 0, numOpArgs = resultOp.getNumArgs(); argIndex != numOpArgs; ++argIndex) { // Start each argument on its own line. - (os << ",\n").indent(8); + os << ",\n "; Argument opArg = resultOp.getArg(argIndex); // Handle the case of operand first. @@ -1060,14 +1046,16 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( DagNode node, const ChildNodeIndexNameMap &childNodeNames) { Operator &resultOp = node.getDialectOp(opMap); - os.indent(6) << formatv("::mlir::SmallVector<::mlir::Value, 4> " - "tblgen_values; (void)tblgen_values;\n"); - os.indent(6) << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " - "tblgen_attrs; (void)tblgen_attrs;\n"); + auto scope = os.scope(); + os << formatv("::mlir::SmallVector<::mlir::Value, 4> " + "tblgen_values; (void)tblgen_values;\n"); + os << formatv("::mlir::SmallVector<::mlir::NamedAttribute, 4> " + "tblgen_attrs; (void)tblgen_attrs;\n"); const char *addAttrCmd = - "if (auto tmpAttr = {1}) " - "tblgen_attrs.emplace_back(rewriter.getIdentifier(\"{0}\"), tmpAttr);\n"; + "if (auto tmpAttr = {1}) {\n" + " tblgen_attrs.emplace_back(rewriter.getIdentifier(\"{0}\"), " + "tmpAttr);\n}\n"; for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) { if (resultOp.getArg(argIndex).is()) { // The argument in the op definition. @@ -1076,14 +1064,14 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( if (!subTree.isNativeCodeCall()) PrintFatalError(loc, "only NativeCodeCall allowed in nested dag node " "for creating attribute"); - os.indent(6) << formatv(addAttrCmd, opArgName, - handleReplaceWithNativeCodeCall(subTree)); + os << formatv(addAttrCmd, opArgName, + handleReplaceWithNativeCodeCall(subTree)); } else { auto leaf = node.getArgAsLeaf(argIndex); // The argument in the result DAG pattern. auto patArgName = node.getArgName(argIndex); - os.indent(6) << formatv(addAttrCmd, opArgName, - handleOpArgument(leaf, patArgName)); + os << formatv(addAttrCmd, opArgName, + handleOpArgument(leaf, patArgName)); } continue; } @@ -1101,10 +1089,10 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( // Resolve the symbol for all range use so that we have a uniform way of // capturing the values. range = symbolInfoMap.getValueAndRangeUse(range); - os.indent(6) << formatv( - "for (auto v : {0}) tblgen_values.push_back(v);\n", range); + os << formatv("for (auto v: {0}) {{\n tblgen_values.push_back(v);\n}\n", + range); } else { - os.indent(6) << formatv("tblgen_values.push_back(", varName); + os << formatv("tblgen_values.push_back(", varName); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse( childNodeNames.lookup(argIndex)); diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt new file mode 100644 index 0000000000000..cec3c2d573860 --- /dev/null +++ b/mlir/unittests/Support/CMakeLists.txt @@ -0,0 +1,6 @@ +add_mlir_unittest(MLIRSupportTests + IndentedOstreamTest.cpp +) + +target_link_libraries(MLIRSupportTests + PRIVATE MLIRSupportIndentedOstream MLIRSupport) diff --git a/mlir/unittests/Support/IndentedOstreamTest.cpp b/mlir/unittests/Support/IndentedOstreamTest.cpp new file mode 100644 index 0000000000000..0271eb73e8897 --- /dev/null +++ b/mlir/unittests/Support/IndentedOstreamTest.cpp @@ -0,0 +1,110 @@ +//===- IndentedOstreamTest.cpp - Indented raw ostream Tests ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/IndentedOstream.h" +#include "gmock/gmock.h" + +using namespace mlir; +using ::testing::StrEq; + +TEST(FormatTest, SingleLine) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros << 10; + ros.flush(); + EXPECT_THAT(os.str(), StrEq("10")); +} + +TEST(FormatTest, SimpleMultiLine) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros << "a"; + ros << "b"; + ros << "\n"; + ros << "c"; + ros << "\n"; + ros.flush(); + EXPECT_THAT(os.str(), StrEq("ab\nc\n")); +} + +TEST(FormatTest, SimpleMultiLineIndent) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros.indent(2) << "a"; + ros.indent(4) << "b"; + ros << "\n"; + ros << "c"; + ros << "\n"; + ros.flush(); + EXPECT_THAT(os.str(), StrEq(" a b\n c\n")); +} + +TEST(FormatTest, SingleRegion) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + ros << "before\n"; + { + raw_indented_ostream::DelimitedScope scope(ros); + ros << "inside " << 10; + ros << "\n two\n"; + { + raw_indented_ostream::DelimitedScope scope(ros, "{\n", "\n}\n"); + ros << "inner inner"; + } + } + ros << "after"; + ros.flush(); + const auto *expected = + R"(before + inside 10 + two + { + inner inner + } +after)"; + EXPECT_THAT(os.str(), StrEq(expected)); + + // Repeat the above with inline form. + str.clear(); + ros << "before\n"; + ros.scope().os << "inside " << 10 << "\n two\n"; + ros.scope().os.scope("{\n", "\n}\n").os << "inner inner"; + ros << "after"; + ros.flush(); + EXPECT_THAT(os.str(), StrEq(expected)); +} + +TEST(FormatTest, Reindent) { + std::string str; + llvm::raw_string_ostream os(str); + raw_indented_ostream ros(os); + + // String to print with some additional empty lines at the start and lines + // with just spaces. + const auto *desc = R"( + + + First line + second line + + + )"; + ros.reindent(desc); + ros.flush(); + const auto *expected = + R"(First line + second line + + +)"; + EXPECT_THAT(os.str(), StrEq(expected)); +} From adccc0bfa301005367d6b89a3aacc07ef0166e64 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Oct 2020 16:55:18 -0700 Subject: [PATCH 484/544] [X86] Add X86ISD opcodes for the Key Locker AESENC*KL and AESDEC*KL instructions Instead of emitting MachineSDNodes during lowering, emit X86ISD opcodes. These opcodes will either be selected by tablegen patterns or custom selection code. Emitting MachineSDNodes during lowering is uncommon so this makes things more consistent. It also allows selectAddr to be called to perform address matching during instruction selection. I had trouble getting tablegen to accept XMM0-XMM7 as results in an isel pattern for the WIDE instructions so I had to use custom instruction selection. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 63 +++++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 107 ++++++------------ llvm/lib/Target/X86/X86ISelLowering.h | 10 ++ llvm/lib/Target/X86/X86InstrInfo.td | 14 +++ llvm/lib/Target/X86/X86InstrKL.td | 16 ++- llvm/test/CodeGen/X86/keylocker-intrinsics.ll | 100 ++++++++++++++++ 6 files changed, 234 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 3b5a29ef31fcf..0d80bde5f7173 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2448,6 +2448,14 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, Parent->getOpcode() != X86ISD::TLSCALL && // Fixme Parent->getOpcode() != X86ISD::ENQCMD && // Fixme Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme + Parent->getOpcode() != X86ISD::AESENC128KL && // Fixme + Parent->getOpcode() != X86ISD::AESDEC128KL && // Fixme + Parent->getOpcode() != X86ISD::AESENC256KL && // Fixme + Parent->getOpcode() != X86ISD::AESDEC256KL && // Fixme + Parent->getOpcode() != X86ISD::AESENCWIDE128KL && // Fixme + Parent->getOpcode() != X86ISD::AESDECWIDE128KL && // Fixme + Parent->getOpcode() != X86ISD::AESENCWIDE256KL && // Fixme + Parent->getOpcode() != X86ISD::AESDECWIDE256KL && // Fixme Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = @@ -5725,6 +5733,61 @@ void X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->RemoveDeadNode(Node); return; } + case X86ISD::AESENCWIDE128KL: + case X86ISD::AESDECWIDE128KL: + case X86ISD::AESENCWIDE256KL: + case X86ISD::AESDECWIDE256KL: { + unsigned Opcode; + switch (Node->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + case X86ISD::AESENCWIDE128KL: + Opcode = X86::AESENCWIDE128KL; + break; + case X86ISD::AESDECWIDE128KL: + Opcode = X86::AESDECWIDE128KL; + break; + case X86ISD::AESENCWIDE256KL: + Opcode = X86::AESENCWIDE256KL; + break; + case X86ISD::AESDECWIDE256KL: + Opcode = X86::AESDECWIDE256KL; + break; + } + + SDValue Chain = Node->getOperand(0); + SDValue Addr = Node->getOperand(1); + + SDValue Base, Scale, Index, Disp, Segment; + if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment)) + break; + + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2), + SDValue()); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3), + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4), + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5), + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6), + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7), + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8), + Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), + Chain.getValue(1)); + + SDVTList VTs = CurDAG->getVTList( + {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, + MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other}); + SDNode *Res = CurDAG->getMachineNode( + Opcode, dl, VTs, + {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); + ReplaceNode(Node, Res); + return; + } } SelectCode(Node); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 935fab44e7c1a..e526a1dd58eb9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26032,118 +26032,73 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, case Intrinsic::x86_aesenc256kl: case Intrinsic::x86_aesdec256kl: { SDLoc DL(Op); - SDVTList VTs = DAG.getVTList(MVT::v16i8, MVT::Other, MVT::Glue); + SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other); SDValue Chain = Op.getOperand(0); unsigned Opcode; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); case Intrinsic::x86_aesenc128kl: - Opcode = X86::AESENC128KL; + Opcode = X86ISD::AESENC128KL; break; case Intrinsic::x86_aesdec128kl: - Opcode = X86::AESDEC128KL; + Opcode = X86ISD::AESDEC128KL; break; case Intrinsic::x86_aesenc256kl: - Opcode = X86::AESENC256KL; + Opcode = X86ISD::AESENC256KL; break; case Intrinsic::x86_aesdec256kl: - Opcode = X86::AESDEC256KL; + Opcode = X86ISD::AESDEC256KL; break; } - SDValue XMM = Op.getOperand(2); - SDValue Base = Op.getOperand(3); - SDValue Index = DAG.getRegister(0, MVT::i32); - SDValue Scale = DAG.getTargetConstant(1, DL, MVT::i8); - SDValue Disp = DAG.getTargetConstant(0, DL, MVT::i32); - SDValue Segment = DAG.getRegister(0, MVT::i32); - - SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, {XMM, Base, Scale, Index, - Disp, Segment, Chain}); - Chain = SDValue(Res, 1); - SDValue EFLAGS = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, - SDValue(Res, 2)); - SDValue ZF = getSETCC(X86::COND_E, EFLAGS.getValue(0), DL, DAG); + SDValue Operation = DAG.getNode(Opcode, DL, VTs, Chain, Op.getOperand(2), + Op.getOperand(3)); + SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG); return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), - {ZF, SDValue(Res, 0), EFLAGS.getValue(1)}); + {ZF, Operation.getValue(0), Operation.getValue(2)}); } case Intrinsic::x86_aesencwide128kl: case Intrinsic::x86_aesdecwide128kl: case Intrinsic::x86_aesencwide256kl: case Intrinsic::x86_aesdecwide256kl: { SDLoc DL(Op); - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDVTList VTs = DAG.getVTList( + {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, + MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other}); SDValue Chain = Op.getOperand(0); unsigned Opcode; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); case Intrinsic::x86_aesencwide128kl: - Opcode = X86::AESENCWIDE128KL; + Opcode = X86ISD::AESENCWIDE128KL; break; case Intrinsic::x86_aesdecwide128kl: - Opcode = X86::AESDECWIDE128KL; + Opcode = X86ISD::AESDECWIDE128KL; break; case Intrinsic::x86_aesencwide256kl: - Opcode = X86::AESENCWIDE256KL; + Opcode = X86ISD::AESENCWIDE256KL; break; case Intrinsic::x86_aesdecwide256kl: - Opcode = X86::AESDECWIDE256KL; + Opcode = X86ISD::AESDECWIDE256KL; break; } - SDValue Base = Op.getOperand(2); - SDValue Index = DAG.getRegister(0, MVT::i32); - SDValue Scale = DAG.getTargetConstant(1, DL, MVT::i8); - SDValue Disp = DAG.getTargetConstant(0, DL, MVT::i32); - SDValue Segment = DAG.getRegister(0, MVT::i32); + SDValue Operation = DAG.getNode( + Opcode, DL, VTs, + {Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), + Op.getOperand(5), Op.getOperand(6), Op.getOperand(7), + Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)}); + SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG); - Chain = DAG.getCopyToReg(Chain, DL, X86::XMM0, Op->getOperand(3), - SDValue()); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM1, - Op->getOperand(4), Chain.getValue(1)); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM2, - Op->getOperand(5), Chain.getValue(1)); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM3, - Op->getOperand(6), Chain.getValue(1)); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM4, - Op->getOperand(7), Chain.getValue(1)); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM5, - Op->getOperand(8), Chain.getValue(1)); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM6, - Op->getOperand(9), Chain.getValue(1)); - Chain = DAG.getCopyToReg(Chain.getValue(0), DL, X86::XMM7, - Op->getOperand(10),Chain.getValue(1)); - - SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, {Base, Scale, Index, - Disp, Segment, Chain, - Chain.getValue(1)}); - - Chain = SDValue(Res, 0); - SDValue EFLAGS = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, - SDValue(Res, 1)); - SDValue ZF = getSETCC(X86::COND_E, EFLAGS.getValue(0), DL, DAG); - SDValue XMM0 = DAG.getCopyFromReg(EFLAGS.getValue(1), DL, X86::XMM0, - MVT::v16i8, EFLAGS.getValue(2)); - SDValue XMM1 = DAG.getCopyFromReg(XMM0.getValue(1), DL, X86::XMM1, - MVT::v16i8, XMM0.getValue(2)); - SDValue XMM2 = DAG.getCopyFromReg(XMM1.getValue(1), DL, X86::XMM2, - MVT::v16i8, XMM1.getValue(2)); - SDValue XMM3 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM3, - MVT::v16i8, XMM2.getValue(2)); - SDValue XMM4 = DAG.getCopyFromReg(XMM3.getValue(1), DL, X86::XMM4, - MVT::v16i8, XMM3.getValue(2)); - SDValue XMM5 = DAG.getCopyFromReg(XMM4.getValue(1), DL, X86::XMM5, - MVT::v16i8, XMM4.getValue(2)); - SDValue XMM6 = DAG.getCopyFromReg(XMM5.getValue(1), DL, X86::XMM6, - MVT::v16i8, XMM5.getValue(2)); - SDValue XMM7 = DAG.getCopyFromReg(XMM6.getValue(1), DL, X86::XMM7, - MVT::v16i8, XMM6.getValue(2)); return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), - {ZF, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM7.getValue(1)}); + {ZF, Operation.getValue(1), Operation.getValue(2), + Operation.getValue(3), Operation.getValue(4), + Operation.getValue(5), Operation.getValue(6), + Operation.getValue(7), Operation.getValue(8), + Operation.getValue(9)}); } } return SDValue(); @@ -31167,6 +31122,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ENQCMD) NODE_NAME_CASE(ENQCMDS) NODE_NAME_CASE(VP2INTERSECT) + NODE_NAME_CASE(AESENC128KL) + NODE_NAME_CASE(AESDEC128KL) + NODE_NAME_CASE(AESENC256KL) + NODE_NAME_CASE(AESDEC256KL) + NODE_NAME_CASE(AESENCWIDE128KL) + NODE_NAME_CASE(AESDECWIDE128KL) + NODE_NAME_CASE(AESENCWIDE256KL) + NODE_NAME_CASE(AESDECWIDE256KL) } return nullptr; #undef NODE_NAME_CASE diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f8de2f7d0e79b..9f231be78191f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -713,6 +713,16 @@ namespace llvm { // Mwaitx builtin is lowered to this if the base pointer needs saving. MWAITX_DAG, + // Key locker nodes that produce flags. + AESENC128KL, + AESDEC128KL, + AESENC256KL, + AESDEC256KL, + AESENCWIDE128KL, + AESDECWIDE128KL, + AESENCWIDE256KL, + AESDECWIDE256KL, + /// X86 strict FP compare instructions. STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCMPS, diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index d13ba5dbc0eb0..3a3d141854a1c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -135,6 +135,11 @@ def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>; +def SDT_X86AESENCDECKL : SDTypeProfile<2, 2, [SDTCisVT<0, v2i64>, + SDTCisVT<1, i32>, + SDTCisVT<2, v2i64>, + SDTCisPtrTy<3>]>; + def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, [SDNPHasChain,SDNPSideEffect]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, @@ -331,6 +336,15 @@ def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD, def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD, [SDNPHasChain, SDNPSideEffect]>; +def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL, + [SDNPHasChain, SDNPSideEffect]>; +def X86aesdec128kl : SDNode<"X86ISD::AESDEC128KL", SDT_X86AESENCDECKL, + [SDNPHasChain, SDNPSideEffect]>; +def X86aesenc256kl : SDNode<"X86ISD::AESENC256KL", SDT_X86AESENCDECKL, + [SDNPHasChain, SDNPSideEffect]>; +def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td index 77e011fe14d63..0c05c7a0ab2cd 100644 --- a/llvm/lib/Target/X86/X86InstrKL.td +++ b/llvm/lib/Target/X86/X86InstrKL.td @@ -36,16 +36,24 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { let Constraints = "$src1 = $dst", Defs = [EFLAGS] in { def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), - "aesenc128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + "aesenc128kl\t{$src2, $src1|$src1, $src2}", + [(set VR128:$dst, EFLAGS, + (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS; def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), - "aesdec128kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + "aesdec128kl\t{$src2, $src1|$src1, $src2}", + [(set VR128:$dst, EFLAGS, + (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS; def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), - "aesenc256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + "aesenc256kl\t{$src2, $src1|$src1, $src2}", + [(set VR128:$dst, EFLAGS, + (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS; def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2), - "aesdec256kl\t{$src2, $src1|$src1, $src2}", []>, T8XS; + "aesdec256kl\t{$src2, $src1|$src1, $src2}", + [(set VR128:$dst, EFLAGS, + (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS; } } // SchedRW, Predicates diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll index d577ffd12e086..e48affb80d5f1 100644 --- a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll @@ -540,3 +540,103 @@ entry: %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 ret i8 %9 } + +; Tests to make sure we can select an appropriate addressing mode for a global. + +@foo = external global [64 x i8] + +define i8 @test_mm_aesenc256kl_u8_global(<2 x i64> %data, <2 x i64>* %out) { +; X64-LABEL: test_mm_aesenc256kl_u8_global: +; X64: # %bb.0: # %entry +; X64-NEXT: aesenc256kl {{.*}}(%rip), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesenc256kl_u8_global: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: aesenc256kl foo, %xmm0 +; X32-NEXT: sete %al +; X32-NEXT: vmovaps %xmm0, (%ecx) +; X32-NEXT: retl +entry: + %h = bitcast [64 x i8]* @foo to i8* + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, i8* %h) + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define i8 @test_mm_aesdecwide256kl_u8_global(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind { +; X64-LABEL: test_mm_aesdecwide256kl_u8_global: +; X64: # %bb.0: # %entry +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: aesdecwide256kl {{.*}}(%rip) +; X64-NEXT: sete %al +; X64-NEXT: movaps %xmm0, (%rdi) +; X64-NEXT: movaps %xmm1, (%rsi) +; X64-NEXT: movaps %xmm1, (%rdx) +; X64-NEXT: movaps %xmm1, (%rcx) +; X64-NEXT: movaps %xmm1, (%r8) +; X64-NEXT: movaps %xmm1, (%r9) +; X64-NEXT: movaps %xmm1, (%r11) +; X64-NEXT: movaps %xmm1, (%r10) +; X64-NEXT: retq +; +; X32-LABEL: test_mm_aesdecwide256kl_u8_global: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl 88(%ebp), %eax +; X32-NEXT: vmovaps 8(%ebp), %xmm3 +; X32-NEXT: vmovaps 24(%ebp), %xmm4 +; X32-NEXT: vmovaps 40(%ebp), %xmm5 +; X32-NEXT: vmovaps 56(%ebp), %xmm6 +; X32-NEXT: vmovaps 72(%ebp), %xmm7 +; X32-NEXT: aesdecwide256kl foo +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: movl 92(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 96(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 100(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 104(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 108(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 112(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: movl 116(%ebp), %eax +; X32-NEXT: vmovaps %xmm1, (%eax) +; X32-NEXT: sete %al +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: retl +entry: + %p = bitcast [64 x i8]* @foo to i8* + %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) + %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %out0 + %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + store <2 x i64> %2, <2 x i64>* %out1 + %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + store <2 x i64> %2, <2 x i64>* %out2 + %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + store <2 x i64> %2, <2 x i64>* %out3 + %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + store <2 x i64> %2, <2 x i64>* %out4 + %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + store <2 x i64> %2, <2 x i64>* %out5 + %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + store <2 x i64> %2, <2 x i64>* %out6 + %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 + store <2 x i64> %2, <2 x i64>* %out7 + %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i8 %9 +} From e9b87f43bde8b5f0d8a79c5884fdce639b12e0ca Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Mon, 3 Aug 2020 11:24:15 -0700 Subject: [PATCH 485/544] [RFC] Factor out repetitive cmake patterns for llvm-style projects New projects (particularly out of tree) have a tendency to hijack the existing llvm configuration options and build targets (add_llvm_library, add_llvm_tool). This can lead to some confusion. 1) When querying a configuration variable, do we care about how LLVM was configured, or how these options were configured for the out of tree project? 2) LLVM has lots of defaults, which are easy to miss (e.g. LLVM_BUILD_TOOLS=ON). These options all need to be duplicated in the CMakeLists.txt for the project. In addition, with LLVM Incubators coming online, we need better ways for these incubators to do things the "LLVM way" without alot of futzing. Ideally, this would happen in a way that eases importing into the LLVM monorepo when projects mature. This patch creates some generic infrastructure in llvm/cmake/modules and refactors MLIR to use this infrastructure. This should expand to include add_xxx_library, which is by far the most complicated bit of building a project correctly, since it has to deal with lots of shared library configuration bits. (MLIR currently hijacks the LLVM infrastructure for building libMLIR.so, so this needs to get refactored anyway.) Differential Revision: https://reviews.llvm.org/D85140 --- llvm/cmake/modules/LLVMProjectOptions.cmake | 68 +++++++++++ llvm/cmake/modules/LLVMProjectTargets.cmake | 109 ++++++++++++++++++ mlir/CMakeLists.txt | 31 ++--- mlir/cmake/modules/AddMLIR.cmake | 9 +- mlir/examples/standalone/CMakeLists.txt | 9 ++ .../standalone/standalone-opt/CMakeLists.txt | 2 +- .../standalone-translate/CMakeLists.txt | 2 +- mlir/examples/toy/CMakeLists.txt | 2 +- mlir/test/Examples/standalone/test.toy | 1 + mlir/tools/mlir-cpu-runner/CMakeLists.txt | 2 +- mlir/tools/mlir-cuda-runner/CMakeLists.txt | 2 +- mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt | 2 +- mlir/tools/mlir-opt/CMakeLists.txt | 2 +- mlir/tools/mlir-reduce/CMakeLists.txt | 2 +- mlir/tools/mlir-rocm-runner/CMakeLists.txt | 2 +- mlir/tools/mlir-translate/CMakeLists.txt | 2 +- mlir/tools/mlir-vulkan-runner/CMakeLists.txt | 2 +- 17 files changed, 221 insertions(+), 28 deletions(-) create mode 100644 llvm/cmake/modules/LLVMProjectOptions.cmake create mode 100644 llvm/cmake/modules/LLVMProjectTargets.cmake diff --git a/llvm/cmake/modules/LLVMProjectOptions.cmake b/llvm/cmake/modules/LLVMProjectOptions.cmake new file mode 100644 index 0000000000000..ce466953280ed --- /dev/null +++ b/llvm/cmake/modules/LLVMProjectOptions.cmake @@ -0,0 +1,68 @@ +# LLVM-style projects generally have the same directory structure. This file +# provides some bolierplate cmake support for projects that supports this +# directory structure. Note that generally speaking, projects should prefer +# to use their own rules for these rather than relying on the core llvm build +# targets. + +# Generally name should be lower case. +function(add_llvm_project_options name) + string(TOUPPER "${name}" uppername) + + # Define options to control the inclusion and default build behavior for + # components which may not strictly be necessary (tools, examples, and tests). + # + # This is primarily to support building smaller or faster project files. + option(${uppername}_INCLUDE_TOOLS + "Generate build targets for the ${uppername} tools." + ${LLVM_INCLUDE_TOOLS}) + option(${uppername}_BUILD_TOOLS + "Build the ${uppername} tools. If OFF, just generate build targets." + ${LLVM_BUILD_TOOLS}) + + option(${uppername}_INCLUDE_UTILS + "Generate build targets for the ${uppername} utils." + ${LLVM_INCLUDE_UTILS}) + option(${uppername}_BUILD_UTILS + "Build ${uppername} utility binaries. If OFF, just generate build targets." + ${LLVM_BUILD_UTILS}) + option(${uppername}_INSTALL_UTILS + "Include utility binaries in the 'install' target." + ${LLVM_INSTALL_UTILS}) + + # i.e. Don't install headers, for instance. + option(${uppername}_INSTALL_TOOLCHAIN_ONLY + "Only include toolchain files in the 'install' target." + ${LLVM_INSTALL_TOOLCHAIN_ONLY}) + + option(${uppername}_BUILD_EXAMPLES + "Build the ${uppername} example programs. If OFF, just generate build targets." + ${LLVM_BUILD_EXAMPLES}) + option(${uppername}_INCLUDE_EXAMPLES + "Generate build targets for the ${uppername} examples" + ${LLVM_INCLUDE_EXAMPLES}) + if(${uppername}_BUILD_EXAMPLES) + add_definitions(-DBUILD_EXAMPLES) + endif(${uppername}_BUILD_EXAMPLES) + + option(${uppername}_BUILD_TESTS + "Build ${uppername} unit tests. If OFF, just generate build targets." + ${LLVM_BUILD_TESTS}) + option(${uppername}_INCLUDE_TESTS + "Generate build targets for the ${uppername} unit tests." + ${LLVM_INCLUDE_TESTS}) + if (${uppername}_INCLUDE_TESTS) + add_definitions(-D${uppername}_INCLUDE_TESTS) + endif() + + option(${uppername}_INCLUDE_INTEGRATION_TESTS + "Generate build targets for the ${uppername} integration tests." + ${LLVM_INCLUDE_INTEGRATION_TESTS}) + if (${uppername}_INCLUDE_INTEGRATION_TESTS) + add_definitions(-D${uppername}_INCLUDE_INTEGRATION_TESTS) + endif() + + option(${uppername}_INCLUDE_DOCS + "Generate build targets for the ${uppername} docs." + ${LLVM_INCLUDE_DOCS}) + +endfunction(add_llvm_project_options) diff --git a/llvm/cmake/modules/LLVMProjectTargets.cmake b/llvm/cmake/modules/LLVMProjectTargets.cmake new file mode 100644 index 0000000000000..4e73706d14777 --- /dev/null +++ b/llvm/cmake/modules/LLVMProjectTargets.cmake @@ -0,0 +1,109 @@ +# For project foo, this function generates: +# add_foo_tool(name) (An executable installed by default) +# add_foo_utility(name) (An executable *not* installed by default) +# add_foo_example(name) (An executable which is built, but never installed) +# add_foo_example_library(name) (A library to go along with an example) + +# It also assumes the following configuration environment variables +# (see LLVMProjectOptions.cmake) +# FOO_TOOLS_INSTALL_DIR +# FOO_BUILD_TOOLS +# FOO_BUILD_UTILS +# FOO_INSTALL_UTILS +# FOO_BUILD_EXAMPLES +# FOO_HAS_EXPORTS +# FOO_INSTALL_TOOLCHAIN_ONLY + +function(add_llvm_project_targets projectname) + string(TOUPPER "${name}" upperprojectname) + + macro(add_${projectname}_tool name) + if( NOT ${upperprojectname}_BUILD_TOOLS ) + set(EXCLUDE_FROM_ALL ON) + endif() + add_llvm_executable(${name} ${ARGN}) + + if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT ${upperprojectname}_INSTALL_TOOLCHAIN_ONLY) + if( ${upperprojectname}_BUILD_TOOLS ) + set(export_to_${projectname}exports) + if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR + NOT LLVM_DISTRIBUTION_COMPONENTS) + set(export_to_${projectname}exports EXPORT ${upperprojectname}Exports) + set_property(GLOBAL PROPERTY ${upperprojectname}_HAS_EXPORTS True) + endif() + + install(TARGETS ${name} + ${export_to_${projectname}exports} + RUNTIME DESTINATION ${${upperprojectname}_TOOLS_INSTALL_DIR} + COMPONENT ${name}) + + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) + endif() + endif() + endif() + if( ${upperprojectname}_BUILD_TOOLS ) + set_property(GLOBAL APPEND PROPERTY ${upperprojectname}_EXPORTS ${name}) + endif() + set_target_properties(${name} PROPERTIES FOLDER "Tools") + endmacro(add_${projectname}_tool name) + + macro(add_${projectname}_example name) + if( NOT ${upperprojectname}_BUILD_EXAMPLES ) + set(EXCLUDE_FROM_ALL ON) + endif() + add_llvm_executable(${name} ${ARGN}) + if( ${upperprojectname}_BUILD_EXAMPLES ) + install(TARGETS ${name} RUNTIME DESTINATION examples) + endif() + set_target_properties(${name} PROPERTIES FOLDER "Examples") + endmacro(add_${projectname}_example name) + + macro(add_${projectname}_example_library name) + if( NOT ${upperprojectname}_BUILD_EXAMPLES ) + set(EXCLUDE_FROM_ALL ON) + add_llvm_library(${name} BUILDTREE_ONLY ${ARGN}) + else() + add_llvm_library(${name} ${ARGN}) + endif() + + set_target_properties(${name} PROPERTIES FOLDER "Examples") + endmacro(add_${projectname}_example_library name) + + # This is a macro that is used to create targets for executables that are needed + # for development, but that are not intended to be installed by default. + macro(add_${projectname}_utility name) + if ( NOT ${upperprojectname}_BUILD_UTILS ) + set(EXCLUDE_FROM_ALL ON) + endif() + + add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) + set_target_properties(${name} PROPERTIES FOLDER "Utils") + if (NOT ${upperprojectname}_INSTALL_TOOLCHAIN_ONLY) + if (${upperprojectname}_INSTALL_UTILS AND ${upperprojectname}_BUILD_UTILS) + set(export_to_${projectname}exports) + if (${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR + NOT LLVM_DISTRIBUTION_COMPONENTS) + set(export_to_${projectname}exports EXPORT ${upperprojectname}Exports) + set_property(GLOBAL PROPERTY ${upperprojectname}_HAS_EXPORTS True) + endif() + + install(TARGETS ${name} + ${export_to_${projectname}exports} + RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} + COMPONENT ${name}) + + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) + endif() + set_property(GLOBAL APPEND PROPERTY ${upperprojectname}_EXPORTS ${name}) + elseif(${upperprojectname}_BUILD_UTILS) + set_property(GLOBAL APPEND PROPERTY ${upperprojectname}_EXPORTS_BUILDTREE_ONLY ${name}) + endif() + endif() + endmacro(add_${projectname}_utility name) +endfunction(add_llvm_project_targets) diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index 50511fd2aef96..ffba3bea224ee 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -21,6 +21,10 @@ set_target_properties(mlir-headers PROPERTIES FOLDER "Misc") add_dependencies(mlir-headers mlir-generic-headers) add_custom_target(mlir-doc) +# Get a bunch of LLVM-style default options. +include(LLVMProjectOptions) +add_llvm_project_options(mlir) + # Build the CUDA conversions and run according tests if the NVPTX backend # is available if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) @@ -44,13 +48,6 @@ set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner" set(MLIR_ROCM_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir ROCm runner") set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner") -option(MLIR_INCLUDE_TESTS - "Generate build targets for the MLIR unit tests." - ${LLVM_INCLUDE_TESTS}) - -option(MLIR_INCLUDE_INTEGRATION_TESTS - "Generate build targets for the MLIR integration tests.") - #------------------------------------------------------------------------------- # Python Bindings Configuration # Requires: @@ -83,42 +80,46 @@ if(MLIR_BINDINGS_PYTHON_ENABLED) "extension = '${PYTHON_MODULE_EXTENSION}") endif() +# Get a bunch of default targets +include(LLVMProjectTargets) +add_llvm_project_targets(mlir) + include_directories( "include") include_directories( ${MLIR_INCLUDE_DIR}) # Adding tools/mlir-tblgen here as calling add_tablegen sets some variables like # MLIR_TABLEGEN_EXE in PARENT_SCOPE which gets lost if that folder is included # from another directory like tools -add_subdirectory(tools/mlir-tblgen) +if (MLIR_INCLUDE_TOOLS) + add_subdirectory(tools/mlir-tblgen) +endif() add_subdirectory(include/mlir) add_subdirectory(lib) # C API needs all dialects for registration, but should be built before tests. add_subdirectory(lib/CAPI) if (MLIR_INCLUDE_TESTS) - add_definitions(-DMLIR_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) endif() if (MLIR_INCLUDE_INTEGRATION_TESTS) - add_definitions(-DMLIR_INCLUDE_INTEGRATION_TESTS) add_subdirectory(integration_test) endif() # Tools needs to come late to ensure that MLIR_ALL_LIBS is populated. # Generally things after this point may depend on MLIR_ALL_LIBS or libMLIR.so. -add_subdirectory(tools) +if (MLIR_INCLUDE_TOOLS) + add_subdirectory(tools) +endif() -if( LLVM_INCLUDE_EXAMPLES ) +if (MLIR_INCLUDE_EXAMPLES) add_subdirectory(examples) endif() -option(MLIR_INCLUDE_DOCS "Generate build targets for the MLIR docs." - ${LLVM_INCLUDE_DOCS}) if (MLIR_INCLUDE_DOCS) add_subdirectory(docs) endif() -if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) +if (NOT MLIR_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/mlir include/mlir-c DESTINATION include COMPONENT mlir-headers diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake index 8394c056c1db5..56742db33ee19 100644 --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -24,7 +24,12 @@ function(add_mlir_interface interface) endfunction() -# Generate Documentation +# Generate Documentation using the mlir-doc rule +# doc_filename: the basename of a .td tablegen file +# command: the tablegen command to run, typically "-gen-op-doc", +# "-gen-pass-doc", or "-gen-dialect-doc" +# output_file: the basename of a .md markdown file to be output +# output_directory: the directory to place the output function(add_mlir_doc doc_filename command output_file output_directory) set(LLVM_TARGET_DEFINITIONS ${doc_filename}.td) tablegen(MLIR ${output_file}.md ${command} "-I${MLIR_MAIN_INCLUDE_DIR}" "-I${MLIR_INCLUDE_DIR}") @@ -40,7 +45,7 @@ function(add_mlir_doc doc_filename command output_file output_directory) endfunction() # Declare an mlir library which can be compiled in libMLIR.so -# In addition to everything that llvm_add_librar accepts, this +# In addition to everything that llvm_add_library accepts, this # also has the following option: # EXCLUDE_FROM_LIBMLIR # Don't include this library in libMLIR.so. This option should be used diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt index 45dc80804aa9a..721efae0388b0 100644 --- a/mlir/examples/standalone/CMakeLists.txt +++ b/mlir/examples/standalone/CMakeLists.txt @@ -31,8 +31,17 @@ list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") include(TableGen) include(AddLLVM) include(AddMLIR) + +# Get a bunch of LLVM-style default options. +include(LLVMProjectOptions) +add_llvm_project_options(standalone) + include(HandleLLVMOptions) +# Get a bunch of default targets +include(LLVMProjectTargets) +add_llvm_project_targets(standalone) + include_directories(${LLVM_INCLUDE_DIRS}) include_directories(${MLIR_INCLUDE_DIRS}) include_directories(${PROJECT_SOURCE_DIR}/include) diff --git a/mlir/examples/standalone/standalone-opt/CMakeLists.txt b/mlir/examples/standalone/standalone-opt/CMakeLists.txt index 06bbb4712645a..e4b12e01228a4 100644 --- a/mlir/examples/standalone/standalone-opt/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-opt/CMakeLists.txt @@ -6,7 +6,7 @@ set(LIBS MLIROptLib MLIRStandalone ) -add_llvm_executable(standalone-opt standalone-opt.cpp) +add_standalone_tool(standalone-opt standalone-opt.cpp) llvm_update_compile_flags(standalone-opt) target_link_libraries(standalone-opt PRIVATE ${LIBS}) diff --git a/mlir/examples/standalone/standalone-translate/CMakeLists.txt b/mlir/examples/standalone/standalone-translate/CMakeLists.txt index 137f7947cfac7..15aa237fd18e2 100644 --- a/mlir/examples/standalone/standalone-translate/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-translate/CMakeLists.txt @@ -5,7 +5,7 @@ set(LLVM_LINK_COMPONENTS get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) -add_llvm_executable(standalone-translate +add_standalone_tool(standalone-translate standalone-translate.cpp ) llvm_update_compile_flags(standalone-translate) diff --git a/mlir/examples/toy/CMakeLists.txt b/mlir/examples/toy/CMakeLists.txt index 56002b1ad2e27..39f6bd09a75c7 100644 --- a/mlir/examples/toy/CMakeLists.txt +++ b/mlir/examples/toy/CMakeLists.txt @@ -3,7 +3,7 @@ set_target_properties(Toy PROPERTIES FOLDER Examples) macro(add_toy_chapter name) add_dependencies(Toy ${name}) - add_llvm_example(${name} ${ARGN}) + add_mlir_example(${name} ${ARGN}) endmacro(add_toy_chapter name) add_subdirectory(Ch1) diff --git a/mlir/test/Examples/standalone/test.toy b/mlir/test/Examples/standalone/test.toy index 7b4a9c23906e3..cd183c9f2fd0e 100644 --- a/mlir/test/Examples/standalone/test.toy +++ b/mlir/test/Examples/standalone/test.toy @@ -1,4 +1,5 @@ # RUN: %cmake %mlir_src_root/examples/standalone -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc -DMLIR_DIR=%llvm_lib_dir/cmake/mlir ; %cmake --build . --target check-standalone | tee %t | FileCheck %s +# RUN: %cmake --build . --target mlir-doc # CHECK: Passed: 3 # UNSUPPORTED: windows, android diff --git a/mlir/tools/mlir-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-cpu-runner/CMakeLists.txt index 596012c882280..7cd81128758d7 100644 --- a/mlir/tools/mlir-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cpu-runner/CMakeLists.txt @@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS nativecodegen ) -add_llvm_tool(mlir-cpu-runner +add_mlir_tool(mlir-cpu-runner mlir-cpu-runner.cpp ) llvm_update_compile_flags(mlir-cpu-runner) diff --git a/mlir/tools/mlir-cuda-runner/CMakeLists.txt b/mlir/tools/mlir-cuda-runner/CMakeLists.txt index 5488262d7ee7e..16daca88bc98f 100644 --- a/mlir/tools/mlir-cuda-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cuda-runner/CMakeLists.txt @@ -68,7 +68,7 @@ if(MLIR_CUDA_RUNNER_ENABLED) LIST(APPEND targets_to_link "LLVM${t}") ENDFOREACH(t) - add_llvm_tool(mlir-cuda-runner + add_mlir_tool(mlir-cuda-runner mlir-cuda-runner.cpp DEPENDS diff --git a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt index bc9a0c1f310a1..c27857b3b7ca6 100644 --- a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt +++ b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_LINK_COMPONENTS Core Support ) -add_llvm_tool(mlir-linalg-ods-gen +add_mlir_tool(mlir-linalg-ods-gen mlir-linalg-ods-gen.cpp ) llvm_update_compile_flags(mlir-linalg-ods-gen) diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index 483dcfec0c0ff..65a328fa141e9 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -50,7 +50,7 @@ add_mlir_library(MLIRMlirOptMain ${LIBS} ) -add_llvm_tool(mlir-opt +add_mlir_tool(mlir-opt mlir-opt.cpp DEPENDS diff --git a/mlir/tools/mlir-reduce/CMakeLists.txt b/mlir/tools/mlir-reduce/CMakeLists.txt index 958c2c94cc684..8e4a42f5882bd 100644 --- a/mlir/tools/mlir-reduce/CMakeLists.txt +++ b/mlir/tools/mlir-reduce/CMakeLists.txt @@ -43,7 +43,7 @@ set(LIBS MLIRTransformUtils ) -add_llvm_tool(mlir-reduce +add_mlir_tool(mlir-reduce OptReductionPass.cpp Passes/OpReducer.cpp ReductionNode.cpp diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt index 2c0791d7a5c1d..3c90beac0b57e 100644 --- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt +++ b/mlir/tools/mlir-rocm-runner/CMakeLists.txt @@ -104,7 +104,7 @@ if(MLIR_ROCM_RUNNER_ENABLED) LIST(APPEND targets_to_link "LLVM${t}") ENDFOREACH(t) - add_llvm_tool(mlir-rocm-runner + add_mlir_tool(mlir-rocm-runner mlir-rocm-runner.cpp DEPENDS diff --git a/mlir/tools/mlir-translate/CMakeLists.txt b/mlir/tools/mlir-translate/CMakeLists.txt index 99b98f9288b92..cc7ff64da42e7 100644 --- a/mlir/tools/mlir-translate/CMakeLists.txt +++ b/mlir/tools/mlir-translate/CMakeLists.txt @@ -5,7 +5,7 @@ set(LLVM_LINK_COMPONENTS get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) -add_llvm_tool(mlir-translate +add_mlir_tool(mlir-translate mlir-translate.cpp ) llvm_update_compile_flags(mlir-translate) diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt index c7a03259bb839..c11b4ef7c9f26 100644 --- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt +++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt @@ -85,7 +85,7 @@ if (MLIR_VULKAN_RUNNER_ENABLED) LIST(APPEND targets_to_link "LLVM${t}") ENDFOREACH(t) - add_llvm_tool(mlir-vulkan-runner + add_mlir_tool(mlir-vulkan-runner mlir-vulkan-runner.cpp ) add_dependencies(mlir-vulkan-runner vulkan-runtime-wrappers) From 7f3da488852b157e456333cc40048526409d4592 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Oct 2020 17:12:23 -0700 Subject: [PATCH 486/544] [X86] Remove X86ISD::MWAITX_DAG. Just match the intrinsic to the custom inserter pseudo instruction during isel. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 --------------- llvm/lib/Target/X86/X86ISelLowering.h | 3 --- llvm/lib/Target/X86/X86InstrCompiler.td | 2 +- llvm/lib/Target/X86/X86InstrInfo.td | 10 +--------- .../tools/llvm-mca/X86/Generic/resources-mwaitx.s | 2 +- .../tools/llvm-mca/X86/Znver1/resources-mwaitx.s | 2 +- .../tools/llvm-mca/X86/Znver2/resources-mwaitx.s | 2 +- 7 files changed, 5 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e526a1dd58eb9..a49847d9ed294 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25952,20 +25952,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); } - case Intrinsic::x86_mwaitx: { - // If the current function needs the base pointer, RBX, - // we shouldn't use mwaitx directly. - // Indeed the lowering of that instruction will clobber - // that register and since RBX will be a reserved register - // the register allocator will not make sure its value will - // be properly saved and restored around this live-range. - SDLoc dl(Op); - unsigned Opcode = X86ISD::MWAITX_DAG; - SDValue Chain = DAG.getNode(Opcode, dl, MVT::Other, - {Op->getOperand(0), Op->getOperand(2), - Op->getOperand(3), Op->getOperand(4)}); - return Chain; - } case Intrinsic::x86_encodekey128: case Intrinsic::x86_encodekey256: { SDLoc DL(Op); @@ -30848,7 +30834,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(LCMPXCHG16_DAG) NODE_NAME_CASE(LCMPXCHG8_SAVE_EBX_DAG) NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) - NODE_NAME_CASE(MWAITX_DAG) NODE_NAME_CASE(LADD) NODE_NAME_CASE(LSUB) NODE_NAME_CASE(LOR) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 9f231be78191f..00ead0c09cce8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -710,9 +710,6 @@ namespace llvm { // For avx512-vp2intersect VP2INTERSECT, - // Mwaitx builtin is lowered to this if the base pointer needs saving. - MWAITX_DAG, - // Key locker nodes that produce flags. AESENC128KL, AESDEC128KL, diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 4f81c271386c5..9e6a5fb934de4 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -930,7 +930,7 @@ def MWAITX : I<0, Pseudo, (outs), (ins GR32:$ecx, GR32:$eax, GR32:$ebx), "mwaitx", - [(X86mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>; + [(int_x86_mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 3a3d141854a1c..47de7d6098551 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -77,9 +77,6 @@ def SDTX86caspairSaveRbx16 : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, SDTCisPtrTy<1>, SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; -def SDTX86mwaitx : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; - def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisInt<2>]>; @@ -192,10 +189,6 @@ def X86cas16save_rbx : SDNode<"X86ISD::LCMPXCHG16_SAVE_RBX_DAG", [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86mwaitx : SDNode<"X86ISD::MWAITX_DAG", SDTX86mwaitx, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, - SDNPMayLoad]>; - def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, @@ -2811,8 +2804,7 @@ let SchedRW = [ WriteSystem ] in { let Uses = [ ECX, EAX, EBX ] in { def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", - [(int_x86_mwaitx ECX, EAX, EBX)]>, - TB, Requires<[ HasMWAITX ]>; + []>, TB, Requires<[ HasMWAITX ]>; } } // SchedRW diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-mwaitx.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-mwaitx.s index 517b283dafaca..5f8125f3b1923 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-mwaitx.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-mwaitx.s @@ -14,7 +14,7 @@ mwaitx # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.33 U monitorx -# CHECK-NEXT: 1 100 0.33 * * U mwaitx +# CHECK-NEXT: 1 100 0.33 U mwaitx # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-mwaitx.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-mwaitx.s index c296b21d75418..4aafec738b597 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-mwaitx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-mwaitx.s @@ -14,7 +14,7 @@ mwaitx # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U monitorx -# CHECK-NEXT: 1 100 0.25 * * U mwaitx +# CHECK-NEXT: 1 100 0.25 U mwaitx # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-mwaitx.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-mwaitx.s index 6c2b8153c26ac..d33ccce033914 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-mwaitx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-mwaitx.s @@ -14,7 +14,7 @@ mwaitx # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 100 0.25 U monitorx -# CHECK-NEXT: 1 100 0.25 * * U mwaitx +# CHECK-NEXT: 1 100 0.25 U mwaitx # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 From 39fc4a0b0af69772ee360b5f729b1ec453217793 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Oct 2020 17:47:52 -0700 Subject: [PATCH 487/544] [X86] Move ENCODEKEY128/256 handling from lowering to selection. We should avoid emitting MachineSDNodes from lowering. We can use the the implicit def handling in InstrEmitter to avoid manually copying from each xmm result register. We only need to manually emit the copies for the implicit uses. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 40 ++++++++++++++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 61 ------------------------- 2 files changed, 36 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 0d80bde5f7173..356a3f7228dea 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4488,6 +4488,38 @@ void X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = Node->getConstantOperandVal(1); + switch (IntNo) { + default: break; + case Intrinsic::x86_encodekey128: + case Intrinsic::x86_encodekey256: { + if (!Subtarget->hasKL()) + break; + + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break; + case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break; + } + + SDValue Chain = Node->getOperand(0); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3), + SDValue()); + if (Opcode == X86::ENCODEKEY256) + Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4), + Chain.getValue(1)); + + MachineSDNode *Res = CurDAG->getMachineNode( + Opcode, dl, Node->getVTList(), + {Node->getOperand(2), Chain, Chain.getValue(1)}); + ReplaceNode(Node, Res); + return; + } + } + break; + } case ISD::INTRINSIC_VOID: { unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { @@ -5737,6 +5769,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::AESDECWIDE128KL: case X86ISD::AESENCWIDE256KL: case X86ISD::AESDECWIDE256KL: { + if (!Subtarget->hasWIDEKL()) + break; + unsigned Opcode; switch (Node->getOpcode()) { default: @@ -5779,11 +5814,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) { Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), Chain.getValue(1)); - SDVTList VTs = CurDAG->getVTList( - {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, - MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other}); SDNode *Res = CurDAG->getMachineNode( - Opcode, dl, VTs, + Opcode, dl, Node->getVTList(), {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); ReplaceNode(Node, Res); return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a49847d9ed294..72cddbb1f83ea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25952,67 +25952,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); } - case Intrinsic::x86_encodekey128: - case Intrinsic::x86_encodekey256: { - SDLoc DL(Op); - SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue); - SDValue Chain = Op.getOperand(0); - bool IsEK256 = false; - Chain = DAG.getCopyToReg(Chain, DL, X86::XMM0, Op->getOperand(3), - SDValue()); - - unsigned Opcode; - - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); - case Intrinsic::x86_encodekey128: - Opcode = X86::ENCODEKEY128; - break; - case Intrinsic::x86_encodekey256: - Opcode = X86::ENCODEKEY256; - Chain = DAG.getCopyToReg(Chain, DL, X86::XMM1, Op->getOperand(4), - Chain.getValue(1)); - IsEK256 = true; - break; - } - - SDNode *Res = DAG.getMachineNode(Opcode, DL, VTs, - {Op.getOperand(2), Chain, - Chain.getValue(1)}); - - Chain = SDValue(Res, 1); - - SDValue XMM0 = DAG.getCopyFromReg(Chain, DL, X86::XMM0, MVT::v16i8, - SDValue(Res, 2)); - SDValue XMM1 = DAG.getCopyFromReg(XMM0.getValue(1), DL, X86::XMM1, - MVT::v16i8, XMM0.getValue(2)); - SDValue XMM2 = DAG.getCopyFromReg(XMM1.getValue(1), DL, X86::XMM2, - MVT::v16i8, XMM1.getValue(2)); - SDValue XMM3, XMM4; - if (IsEK256) { - XMM3 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM3, - MVT::v16i8, XMM2.getValue(2)); - XMM4 = DAG.getCopyFromReg(XMM3.getValue(1), DL, X86::XMM4, - MVT::v16i8, XMM3.getValue(2)); - } else { - XMM4 = DAG.getCopyFromReg(XMM2.getValue(1), DL, X86::XMM4, - MVT::v16i8, XMM2.getValue(2)); - } - SDValue XMM5 = DAG.getCopyFromReg(XMM4.getValue(1), DL, X86::XMM5, - MVT::v16i8, XMM4.getValue(2)); - SDValue XMM6 = DAG.getCopyFromReg(XMM5.getValue(1), DL, X86::XMM6, - MVT::v16i8, XMM5.getValue(2)); - - if (IsEK256) { - return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), - {SDValue(Res, 0), - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, Chain}); - } else { - return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), - {SDValue(Res, 0), - XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, Chain}); - } - } case Intrinsic::x86_aesenc128kl: case Intrinsic::x86_aesdec128kl: case Intrinsic::x86_aesenc256kl: From a7e45ea30d4c9c3f66f44f0e69e31eac3a22db42 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Oct 2020 21:42:06 -0700 Subject: [PATCH 488/544] [X86] Add memory operand to AESENC/AESDEC Key Locker instructions. This removes FIXMEs from selectAddr. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 11 +--- llvm/lib/Target/X86/X86ISelLowering.cpp | 58 ++++++++++++++++--- llvm/lib/Target/X86/X86ISelLowering.h | 20 +++---- llvm/lib/Target/X86/X86InstrInfo.td | 12 ++-- llvm/lib/Target/X86/X86InstrKL.td | 3 +- llvm/test/CodeGen/X86/keylocker-intrinsics.ll | 8 +-- 6 files changed, 76 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 356a3f7228dea..3791ed5303a61 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2448,14 +2448,6 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, Parent->getOpcode() != X86ISD::TLSCALL && // Fixme Parent->getOpcode() != X86ISD::ENQCMD && // Fixme Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme - Parent->getOpcode() != X86ISD::AESENC128KL && // Fixme - Parent->getOpcode() != X86ISD::AESDEC128KL && // Fixme - Parent->getOpcode() != X86ISD::AESENC256KL && // Fixme - Parent->getOpcode() != X86ISD::AESDEC256KL && // Fixme - Parent->getOpcode() != X86ISD::AESENCWIDE128KL && // Fixme - Parent->getOpcode() != X86ISD::AESDECWIDE128KL && // Fixme - Parent->getOpcode() != X86ISD::AESENCWIDE256KL && // Fixme - Parent->getOpcode() != X86ISD::AESDECWIDE256KL && // Fixme Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = @@ -5814,9 +5806,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), Chain.getValue(1)); - SDNode *Res = CurDAG->getMachineNode( + MachineSDNode *Res = CurDAG->getMachineNode( Opcode, dl, Node->getVTList(), {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); + CurDAG->setNodeMemRefs(Res, cast(Node)->getMemOperand()); ReplaceNode(Node, Res); return; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 72cddbb1f83ea..13de7bb75b8ab 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5022,13 +5022,47 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { + Info.flags = MachineMemOperand::MONone; + Info.offset = 0; const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); - if (!IntrData) + if (!IntrData) { + switch (Intrinsic) { + case Intrinsic::x86_aesenc128kl: + case Intrinsic::x86_aesdec128kl: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(1); + Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); + Info.align = Align(1); + Info.flags |= MachineMemOperand::MOLoad; + return true; + case Intrinsic::x86_aesenc256kl: + case Intrinsic::x86_aesdec256kl: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(1); + Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); + Info.align = Align(1); + Info.flags |= MachineMemOperand::MOLoad; + return true; + case Intrinsic::x86_aesencwide128kl: + case Intrinsic::x86_aesdecwide128kl: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(0); + Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); + Info.align = Align(1); + Info.flags |= MachineMemOperand::MOLoad; + return true; + case Intrinsic::x86_aesencwide256kl: + case Intrinsic::x86_aesdecwide256kl: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(0); + Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); + Info.align = Align(1); + Info.flags |= MachineMemOperand::MOLoad; + return true; + } return false; - - Info.flags = MachineMemOperand::MONone; - Info.offset = 0; + } switch (IntrData->Type) { case TRUNCATE_TO_MEM_VI8: @@ -25977,8 +26011,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, break; } - SDValue Operation = DAG.getNode(Opcode, DL, VTs, Chain, Op.getOperand(2), - Op.getOperand(3)); + MemIntrinsicSDNode *MemIntr = cast(Op); + MachineMemOperand *MMO = MemIntr->getMemOperand(); + EVT MemVT = MemIntr->getMemoryVT(); + SDValue Operation = DAG.getMemIntrinsicNode( + Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3)}, MemVT, + MMO); SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG); return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), @@ -26011,11 +26049,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, break; } - SDValue Operation = DAG.getNode( + MemIntrinsicSDNode *MemIntr = cast(Op); + MachineMemOperand *MMO = MemIntr->getMemOperand(); + EVT MemVT = MemIntr->getMemoryVT(); + SDValue Operation = DAG.getMemIntrinsicNode( Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), Op.getOperand(5), Op.getOperand(6), Op.getOperand(7), - Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)}); + Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)}, + MemVT, MMO); SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG); return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 00ead0c09cce8..f9bf6fb988ebe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -710,16 +710,6 @@ namespace llvm { // For avx512-vp2intersect VP2INTERSECT, - // Key locker nodes that produce flags. - AESENC128KL, - AESDEC128KL, - AESENC256KL, - AESDEC256KL, - AESENCWIDE128KL, - AESDECWIDE128KL, - AESENCWIDE256KL, - AESDECWIDE256KL, - /// X86 strict FP compare instructions. STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCMPS, @@ -836,6 +826,16 @@ namespace llvm { MGATHER, MSCATTER, + // Key locker nodes that produce flags. + AESENC128KL, + AESDEC128KL, + AESENC256KL, + AESDEC256KL, + AESENCWIDE128KL, + AESDECWIDE128KL, + AESENCWIDE256KL, + AESDECWIDE256KL, + // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 47de7d6098551..0ac8cb9c27764 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -330,13 +330,17 @@ def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD, [SDNPHasChain, SDNPSideEffect]>; def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL, - [SDNPHasChain, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; def X86aesdec128kl : SDNode<"X86ISD::AESDEC128KL", SDT_X86AESENCDECKL, - [SDNPHasChain, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; def X86aesenc256kl : SDNode<"X86ISD::AESENC256KL", SDT_X86AESENCDECKL, - [SDNPHasChain, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL, - [SDNPHasChain, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td index 0c05c7a0ab2cd..d17a1e9f93dcb 100644 --- a/llvm/lib/Target/X86/X86InstrKL.td +++ b/llvm/lib/Target/X86/X86InstrKL.td @@ -60,7 +60,8 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { let SchedRW = [WriteSystem], Predicates = [HasWIDEKL] in { let Uses = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7], - Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in { + Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7], + mayLoad = 1 in { def AESENCWIDE128KL : I<0xD8, MRM0m, (outs), (ins opaquemem:$src), "aesencwide128kl\t$src", []>, T8XS; def AESDECWIDE128KL : I<0xD8, MRM1m, (outs), (ins opaquemem:$src), diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll index e48affb80d5f1..584391f2eafdb 100644 --- a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 declare void @llvm.x86.loadiwkey(i32, <2 x i64>, <2 x i64>, <2 x i64>) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) From ae2e51597f6e9478b7ccbdf1cf633540d7d1e8b3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 3 Oct 2020 21:54:59 -0700 Subject: [PATCH 489/544] [X86] LOADIWKEY, ENCODEKEY128 and ENCODEKEY256 clobber EFLAGS. --- llvm/lib/Target/X86/X86InstrKL.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td index d17a1e9f93dcb..aa7df4256cec5 100644 --- a/llvm/lib/Target/X86/X86InstrKL.td +++ b/llvm/lib/Target/X86/X86InstrKL.td @@ -17,18 +17,18 @@ // Key Locker instructions let SchedRW = [WriteSystem], Predicates = [HasKL] in { - let Uses = [XMM0, EAX] in { + let Uses = [XMM0, EAX], Defs = [EFLAGS] in { def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "loadiwkey\t{$src2, $src1|$src1, $src2}", [(int_x86_loadiwkey EAX, XMM0, VR128:$src1, VR128:$src2)]>, T8XS; } - let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6] in { + let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in { def ENCODEKEY128 : I<0xFA, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS; } - let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6] in { + let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, EFLAGS] in { def ENCODEKEY256 : I<0xFB, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS; } From 0f08a1a5b162dcd2caf1b76827b917ca69e3e48d Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Sat, 3 Oct 2020 22:36:28 -0700 Subject: [PATCH 490/544] [lldb] [test/Register] Mark new FP reg tests XFAIL on Darwin This is failing on GreenDragon: http://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/24066/ --- lldb/test/Shell/Register/x86-64-fp-write.test | 1 + lldb/test/Shell/Register/x86-fp-read.test | 1 + 2 files changed, 2 insertions(+) diff --git a/lldb/test/Shell/Register/x86-64-fp-write.test b/lldb/test/Shell/Register/x86-64-fp-write.test index 38c8942091185..6f8047f94360e 100644 --- a/lldb/test/Shell/Register/x86-64-fp-write.test +++ b/lldb/test/Shell/Register/x86-64-fp-write.test @@ -1,4 +1,5 @@ # XFAIL: system-windows +# XFAIL: system-darwin # REQUIRES: native && target-x86_64 # RUN: %clangxx_host %p/Inputs/x86-fp-write.cpp -o %t # RUN: %lldb -b -s %s %t | FileCheck %s diff --git a/lldb/test/Shell/Register/x86-fp-read.test b/lldb/test/Shell/Register/x86-fp-read.test index 42c85baa22953..eac942f5989cf 100644 --- a/lldb/test/Shell/Register/x86-fp-read.test +++ b/lldb/test/Shell/Register/x86-fp-read.test @@ -1,4 +1,5 @@ # XFAIL: system-windows +# XFAIL: system-darwin # REQUIRES: native && (target-x86 || target-x86_64) # RUN: %clangxx_host -g %p/Inputs/x86-fp-read.cpp -o %t # RUN: %lldb -b -s %s %t | FileCheck %s From 51beb0c80dc45d45efada5cd6e3a13c9fef0805c Mon Sep 17 00:00:00 2001 From: Alexey Baturo Date: Sun, 4 Oct 2020 10:31:30 +0300 Subject: [PATCH 491/544] [RISCV][ASAN] unwind fixup [8/11] patch series to port ASAN for riscv64 Depends On D87577 Reviewed By: eugenis, vitalybuka Differential Revision: https://reviews.llvm.org/D87579 --- .../lib/sanitizer_common/sanitizer_stacktrace.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp index ca2f90a51c9e8..e51609f54a0af 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp @@ -116,6 +116,9 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, uhwptr pc1 = caller_frame[2]; #elif defined(__s390__) uhwptr pc1 = frame[14]; +#elif defined(__riscv) + // frame[-1] contains the return address + uhwptr pc1 = frame[-1]; #else uhwptr pc1 = frame[1]; #endif @@ -128,7 +131,13 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, trace_buffer[size++] = (uptr) pc1; } bottom = (uptr)frame; - frame = GetCanonicFrame((uptr)frame[0], stack_top, bottom); +#if defined(__riscv) + // frame[-2] contain fp of the previous frame + uptr new_bp = (uptr)frame[-2]; +#else + uptr new_bp = (uptr)frame[0]; +#endif + frame = GetCanonicFrame(new_bp, stack_top, bottom); } } From 03bd5198b6f7d9f49d72e6516d813a206f3b6d0d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 4 Oct 2020 11:27:07 +0300 Subject: [PATCH 492/544] [OldPM] Pass manager: run SROA after (simple) loop unrolling I have stumbled into this pretty accidentally, when rewriting some spaghetti-like code into something more structured, which involved using some `std::array<>`s. And to my surprise, the `alloca`s remained, causing about `+160%` perf regression. https://llvm-compile-time-tracker.com/compare.php?from=bb6f4d32aac3eecb51909f4facc625219307ee68&to=d563e66f40f9d4d145cb2050e41cb961e2b37785&stat=instructions suggests that this has geomean compile-time cost of `+0.08%`. Note that D68593 / cecc0d27ad58c0aed8ef9ed99bbf691e137a0f26 already did this chage for NewPM, but left OldPM in a pessimized state. This fixes [[ https://bugs.llvm.org/show_bug.cgi?id=40011 | PR40011 ]], [[ https://bugs.llvm.org/show_bug.cgi?id=42794 | PR42794 ]] and probably some other reports. Reviewed By: nikic, xbolva00 Differential Revision: https://reviews.llvm.org/D87972 --- clang/test/CodeGenCXX/union-tbaa2.cpp | 2 +- clang/test/Misc/loop-opt-setup.c | 28 +++----- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 --- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 3 + llvm/test/Other/opt-O2-pipeline.ll | 6 +- .../Other/opt-O3-pipeline-enable-matrix.ll | 2 + llvm/test/Other/opt-O3-pipeline.ll | 2 + llvm/test/Other/opt-Os-pipeline.ll | 2 + .../X86/SROA-after-loop-unrolling.ll | 64 +++++-------------- 9 files changed, 37 insertions(+), 80 deletions(-) diff --git a/clang/test/CodeGenCXX/union-tbaa2.cpp b/clang/test/CodeGenCXX/union-tbaa2.cpp index 5d13ff1ad8d9a..65872d4a98ae3 100644 --- a/clang/test/CodeGenCXX/union-tbaa2.cpp +++ b/clang/test/CodeGenCXX/union-tbaa2.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -O2 -fno-experimental-new-pass-manager -std=c++11 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -target-feature +sse4.2 -target-feature +avx -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -target-feature +sse4.2 -target-feature +avx -emit-llvm -o - | FileCheck %s // Testcase from llvm.org/PR32056 diff --git a/clang/test/Misc/loop-opt-setup.c b/clang/test/Misc/loop-opt-setup.c index 868c716c6ed74..322f5e0e6d4aa 100644 --- a/clang/test/Misc/loop-opt-setup.c +++ b/clang/test/Misc/loop-opt-setup.c @@ -1,5 +1,5 @@ -// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-NEWPM -// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-OLDPM +// RUN: %clang -O1 -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s + extern int a[16]; int b = 0; int foo(void) { @@ -9,10 +9,8 @@ int foo(void) { return b; } // Check br i1 to make sure that the loop is fully unrolled -// CHECK-LABEL-NEWPM: foo -// CHECK-NOT-NEWPM: br i1 -// CHECK-LABEL-OLDPM: foo -// CHECK-NOT-OLDPM: br i1 +// CHECK-LABEL: foo +// CHECK-NOT: br i1 void Helper() { const int *nodes[5]; @@ -26,17 +24,7 @@ void Helper() { } // Check br i1 to make sure the loop is gone, there will still be a label branch for the infinite loop. -// CHECK-LABEL-NEWPM: Helper -// CHECK-NEWPM: br label -// CHECK-NEWPM-NOT: br i1 -// CHECK-NEWPM: br label - -// The old pass manager doesn't remove the while loop so check for 5 load i32*. -// CHECK-LABEL-OLDPM: Helper -// CHECK-OLDPM: br label -// CHECK-OLDPM: load i32* -// CHECK-OLDPM: load i32* -// CHECK-OLDPM: load i32* -// CHECK-OLDPM: load i32* -// CHECK-OLDPM: load i32* -// CHECK-OLDPM: ret +// CHECK-LABEL: Helper +// CHECK: br label +// CHECK-NOT: br i1 +// CHECK: br label diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ccc493640b292..043effc97f2be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -479,14 +479,6 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { if (EnableOpt) PM.add(createAMDGPUPromoteAllocaToVector()); }); - - Builder.addExtension( - PassManagerBuilder::EP_LoopOptimizerEnd, - [](const PassManagerBuilder &, legacy::PassManagerBase &PM) { - // Add SROA after loop unrolling as more promotable patterns are - // exposed after small loops are fully unrolled. - PM.add(createSROAPass()); - }); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index c63705a4ee947..088f1e25f3d15 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -459,6 +459,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( addExtensionsToPM(EP_LoopOptimizerEnd, MPM); // This ends the loop pass pipelines. + // Break up allocas that may now be splittable after loop unrolling. + MPM.add(createSROAPass()); + if (OptLevel > 1) { MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds MPM.add(NewGVN ? createNewGVNPass() diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 58ed6b2a0820a..967477da22bd6 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts @@ -22,7 +22,7 @@ ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: Target Transform Information ; Target Pass Configuration -; CHECK: Type-Based Alias Analysis +; CHECK: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info @@ -134,6 +134,8 @@ ; CHECK-NEXT: Recognize loop idioms ; CHECK-NEXT: Delete dead loops ; CHECK-NEXT: Unroll loops +; CHECK-NEXT: SROA +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: MergedLoadStoreMotion ; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index 493957e865d4f..3b8db87e8fb17 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -139,6 +139,8 @@ ; CHECK-NEXT: Recognize loop idioms ; CHECK-NEXT: Delete dead loops ; CHECK-NEXT: Unroll loops +; CHECK-NEXT: SROA +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: MergedLoadStoreMotion ; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index f674dabd52173..a53db61a93cf8 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -139,6 +139,8 @@ ; CHECK-NEXT: Recognize loop idioms ; CHECK-NEXT: Delete dead loops ; CHECK-NEXT: Unroll loops +; CHECK-NEXT: SROA +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: MergedLoadStoreMotion ; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index 66df666a64c69..93c2d121255bc 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -120,6 +120,8 @@ ; CHECK-NEXT: Recognize loop idioms ; CHECK-NEXT: Delete dead loops ; CHECK-NEXT: Unroll loops +; CHECK-NEXT: SROA +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: MergedLoadStoreMotion ; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Function Alias Analysis Results diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll index 8c8a80cbf7ffa..22694901162c1 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll @@ -22,55 +22,21 @@ target triple = "x86_64-unknown-linux-gnu" %"struct.std::array" = type { [6 x i32] } define dso_local void @_Z3fooi(i32 %cnt) { -; OLDPM-LABEL: @_Z3fooi( -; OLDPM-NEXT: entry: -; OLDPM-NEXT: [[ARR:%.*]] = alloca %"struct.std::array", align 16 -; OLDPM-NEXT: [[TMP0:%.*]] = bitcast %"struct.std::array"* [[ARR]] to i8* -; OLDPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[TMP0]]) -; OLDPM-NEXT: [[ARRAYDECAY_I_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 0 -; OLDPM-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 1 -; OLDPM-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 2 -; OLDPM-NEXT: [[INCDEC_PTR_2:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 3 -; OLDPM-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[CNT:%.*]], i32 0 -; OLDPM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer -; OLDPM-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], -; OLDPM-NEXT: [[TMP4:%.*]] = bitcast %"struct.std::array"* [[ARR]] to <4 x i32>* -; OLDPM-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16 -; OLDPM-NEXT: [[INCDEC_PTR_3:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 4 -; OLDPM-NEXT: [[INC_4:%.*]] = add nsw i32 [[CNT]], 5 -; OLDPM-NEXT: store i32 [[INC_4]], i32* [[INCDEC_PTR_3]], align 16 -; OLDPM-NEXT: [[INCDEC_PTR_4:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 5 -; OLDPM-NEXT: [[INC_5:%.*]] = add nsw i32 [[CNT]], 6 -; OLDPM-NEXT: store i32 [[INC_5]], i32* [[INCDEC_PTR_4]], align 4 -; OLDPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYDECAY_I_I_I]], align 16 -; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP5]]) -; OLDPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 -; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP6]]) -; OLDPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 8 -; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP7]]) -; OLDPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[INCDEC_PTR_2]], align 4 -; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP8]]) -; OLDPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[INCDEC_PTR_3]], align 16 -; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP9]]) -; OLDPM-NEXT: call void @_Z3usei(i32 [[INC_5]]) -; OLDPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]]) -; OLDPM-NEXT: ret void -; -; NEWPM-LABEL: @_Z3fooi( -; NEWPM-NEXT: entry: -; NEWPM-NEXT: [[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1 -; NEWPM-NEXT: [[INC_1:%.*]] = add nsw i32 [[CNT]], 2 -; NEWPM-NEXT: [[INC_2:%.*]] = add nsw i32 [[CNT]], 3 -; NEWPM-NEXT: [[INC_3:%.*]] = add nsw i32 [[CNT]], 4 -; NEWPM-NEXT: [[INC_4:%.*]] = add nsw i32 [[CNT]], 5 -; NEWPM-NEXT: [[INC_5:%.*]] = add nsw i32 [[CNT]], 6 -; NEWPM-NEXT: call void @_Z3usei(i32 [[INC]]) -; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_1]]) -; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_2]]) -; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_3]]) -; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_4]]) -; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_5]]) -; NEWPM-NEXT: ret void +; CHECK-LABEL: @_Z3fooi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1 +; CHECK-NEXT: [[INC_1:%.*]] = add nsw i32 [[CNT]], 2 +; CHECK-NEXT: [[INC_2:%.*]] = add nsw i32 [[CNT]], 3 +; CHECK-NEXT: [[INC_3:%.*]] = add nsw i32 [[CNT]], 4 +; CHECK-NEXT: [[INC_4:%.*]] = add nsw i32 [[CNT]], 5 +; CHECK-NEXT: [[INC_5:%.*]] = add nsw i32 [[CNT]], 6 +; CHECK-NEXT: call void @_Z3usei(i32 [[INC]]) +; CHECK-NEXT: call void @_Z3usei(i32 [[INC_1]]) +; CHECK-NEXT: call void @_Z3usei(i32 [[INC_2]]) +; CHECK-NEXT: call void @_Z3usei(i32 [[INC_3]]) +; CHECK-NEXT: call void @_Z3usei(i32 [[INC_4]]) +; CHECK-NEXT: call void @_Z3usei(i32 [[INC_5]]) +; CHECK-NEXT: ret void ; entry: %cnt.addr = alloca i32 From 7ce4dfb4dd2ca431f17b62ef563cc3b2737c677e Mon Sep 17 00:00:00 2001 From: Alexey Baturo Date: Sun, 4 Oct 2020 12:38:06 +0300 Subject: [PATCH 493/544] [RISCV][ASAN] support code for architecture-specific parts of asan [9/11] patch series to port ASAN for riscv64 Depends On D87579 Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D87580 --- compiler-rt/lib/asan/asan_allocator.h | 7 ++++++- compiler-rt/lib/asan/asan_mapping.h | 17 +++++++++++++++ compiler-rt/lib/asan/asan_shadow_setup.cpp | 3 ++- .../lib/sanitizer_common/sanitizer_common.h | 5 ++++- .../sanitizer_common_syscalls.inc | 14 +++++++------ .../lib/sanitizer_common/sanitizer_linux.cpp | 8 +++++-- .../sanitizer_linux_libcdep.cpp | 21 ++++++++++++++++--- .../lib/sanitizer_common/sanitizer_platform.h | 2 ++ .../sanitizer_platform_interceptors.h | 9 ++++---- .../sanitizer_platform_limits_posix.cpp | 17 +++++++++------ .../sanitizer_platform_limits_posix.h | 4 ++-- .../sanitizer_common/sanitizer_stacktrace.cpp | 4 +++- .../sanitizer_common/sanitizer_stacktrace.h | 1 + .../sanitizer_stoptheworld_linux_libcdep.cpp | 16 +++++++++----- .../sanitizer_symbolizer_libcdep.cpp | 3 +++ 15 files changed, 99 insertions(+), 32 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index 612799f90964a..2963e979b55c0 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -15,10 +15,11 @@ #define ASAN_ALLOCATOR_H #include "asan_flags.h" -#include "asan_internal.h" #include "asan_interceptors.h" +#include "asan_internal.h" #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_list.h" +#include "sanitizer_common/sanitizer_platform.h" namespace __asan { @@ -132,6 +133,10 @@ typedef DefaultSizeClassMap SizeClassMap; const uptr kAllocatorSpace = ~(uptr)0; const uptr kAllocatorSize = 0x2000000000ULL; // 128G. typedef VeryCompactSizeClassMap SizeClassMap; +#elif SANITIZER_RISCV64 +const uptr kAllocatorSpace = ~(uptr)0; +const uptr kAllocatorSize = 0x2000000000ULL; // 128G. +typedef VeryDenseSizeClassMap SizeClassMap; # elif defined(__aarch64__) // AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA // so no need to different values for different VMA. diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h index 27598171fc29b..f239c3ee2ff92 100644 --- a/compiler-rt/lib/asan/asan_mapping.h +++ b/compiler-rt/lib/asan/asan_mapping.h @@ -79,6 +79,20 @@ // || `[0x1000000000, 0x11ffffffff]` || lowshadow || // || `[0x0000000000, 0x0fffffffff]` || lowmem || // +// RISC-V has only 38 bits for task size +// Low mem size is set with kRiscv64_ShadowOffset64 in +// compiler-rt/lib/asan/asan_allocator.h and in +// llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp with +// kRiscv64_ShadowOffset64, High mem top border is set with +// GetMaxVirtualAddress() in +// compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +// Default Linux/RISCV64 Sv39/Sv48 mapping: +// || `[0x000820000000, 0x003fffffffff]` || HighMem || +// || `[0x000124000000, 0x00081fffffff]` || HighShadow || +// || `[0x000024000000, 0x000123ffffff]` || ShadowGap || +// || `[0x000020000000, 0x000023ffffff]` || LowShadow || +// || `[0x000000000000, 0x00001fffffff]` || LowMem || +// // Default Linux/AArch64 (42-bit VMA) mapping: // || `[0x10000000000, 0x3ffffffffff]` || highmem || // || `[0x0a000000000, 0x0ffffffffff]` || highshadow || @@ -161,6 +175,7 @@ static const u64 kDefaultShadowOffset64 = 1ULL << 44; static const u64 kDefaultShort64bitShadowOffset = 0x7FFFFFFF & (~0xFFFULL << kDefaultShadowScale); // < 2G. static const u64 kAArch64_ShadowOffset64 = 1ULL << 36; +static const u64 kRiscv64_ShadowOffset64 = 0x20000000; static const u64 kMIPS32_ShadowOffset32 = 0x0aaa0000; static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37; static const u64 kPPC64_ShadowOffset64 = 1ULL << 44; @@ -208,6 +223,8 @@ static const u64 kMyriadCacheBitMask32 = 0x40000000ULL; # define SHADOW_OFFSET __asan_shadow_memory_dynamic_address # elif SANITIZER_MAC && defined(__aarch64__) # define SHADOW_OFFSET __asan_shadow_memory_dynamic_address +#elif SANITIZER_RISCV64 +#define SHADOW_OFFSET kRiscv64_ShadowOffset64 # elif defined(__aarch64__) # define SHADOW_OFFSET kAArch64_ShadowOffset64 # elif defined(__powerpc64__) diff --git a/compiler-rt/lib/asan/asan_shadow_setup.cpp b/compiler-rt/lib/asan/asan_shadow_setup.cpp index 0e2623a23028e..2ead4425add83 100644 --- a/compiler-rt/lib/asan/asan_shadow_setup.cpp +++ b/compiler-rt/lib/asan/asan_shadow_setup.cpp @@ -44,7 +44,8 @@ static void ProtectGap(uptr addr, uptr size) { } static void MaybeReportLinuxPIEBug() { -#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__aarch64__)) +#if SANITIZER_LINUX && \ + (defined(__x86_64__) || defined(__aarch64__) || SANITIZER_RISCV64) Report("This might be related to ELF_ET_DYN_BASE change in Linux 4.12.\n"); Report( "See https://github.com/google/sanitizers/issues/856 for possible " diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index c8575a984c0c3..040db6fc2a16b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -674,7 +674,8 @@ enum ModuleArch { kModuleArchARMV7, kModuleArchARMV7S, kModuleArchARMV7K, - kModuleArchARM64 + kModuleArchARM64, + kModuleArchRISCV64 }; // Opens the file 'file_name" and reads up to 'max_len' bytes. @@ -718,6 +719,8 @@ inline const char *ModuleArchToString(ModuleArch arch) { return "armv7k"; case kModuleArchARM64: return "arm64"; + case kModuleArchRISCV64: + return "riscv64"; } CHECK(0 && "Invalid module arch"); return ""; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc index 532ac9ead3498..1b89d6e176840 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc @@ -2294,9 +2294,10 @@ PRE_SYSCALL(ni_syscall)() {} POST_SYSCALL(ni_syscall)(long res) {} PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) { -#if !SANITIZER_ANDROID && \ - (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__)) +#if !SANITIZER_ANDROID && \ + (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ + defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ + SANITIZER_RISCV64) if (data) { if (request == ptrace_setregs) { PRE_READ((void *)data, struct_user_regs_struct_sz); @@ -2315,9 +2316,10 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) { } POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) { -#if !SANITIZER_ANDROID && \ - (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__)) +#if !SANITIZER_ANDROID && \ + (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ + defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ + SANITIZER_RISCV64) if (res >= 0 && data) { // Note that this is different from the interceptor in // sanitizer_common_interceptors.inc. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 0e48062828a4b..c84946ca9bf29 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -154,6 +154,8 @@ namespace __sanitizer { #if SANITIZER_LINUX && defined(__x86_64__) #include "sanitizer_syscall_linux_x86_64.inc" +#elif SANITIZER_LINUX && SANITIZER_RISCV64 +#include "sanitizer_syscall_linux_riscv64.inc" #elif SANITIZER_LINUX && defined(__aarch64__) #include "sanitizer_syscall_linux_aarch64.inc" #elif SANITIZER_LINUX && defined(__arm__) @@ -712,7 +714,7 @@ struct linux_dirent { }; #else struct linux_dirent { -#if SANITIZER_X32 || defined(__aarch64__) +#if SANITIZER_X32 || defined(__aarch64__) || SANITIZER_RISCV64 u64 d_ino; u64 d_off; #else @@ -720,7 +722,7 @@ struct linux_dirent { unsigned long d_off; #endif unsigned short d_reclen; -#ifdef __aarch64__ +#if defined(__aarch64__) || SANITIZER_RISCV64 unsigned char d_type; #endif char d_name[256]; @@ -1069,6 +1071,8 @@ uptr GetMaxVirtualAddress() { // This should (does) work for both PowerPC64 Endian modes. // Similarly, aarch64 has multiple address space layouts: 39, 42 and 47-bit. return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; +#elif SANITIZER_RISCV64 + return (1ULL << 38) - 1; # elif defined(__mips64) return (1ULL << 40) - 1; // 0x000000ffffffffffUL; # elif defined(__s390x__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index f95f03b089a3a..b8b999363ff26 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -270,7 +270,7 @@ void InitTlsSize() { } #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) || \ defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) || \ - defined(__arm__)) && \ + defined(__arm__) || SANITIZER_RISCV64) && \ SANITIZER_LINUX && !SANITIZER_ANDROID // sizeof(struct pthread) from glibc. static atomic_uintptr_t thread_descriptor_size; @@ -310,6 +310,21 @@ uptr ThreadDescriptorSize() { #elif defined(__mips__) // TODO(sagarthakur): add more values as per different glibc versions. val = FIRST_32_SECOND_64(1152, 1776); +#elif SANITIZER_RISCV64 + int major; + int minor; + int patch; + if (GetLibcVersion(&major, &minor, &patch) && major == 2) { + // TODO: consider adding an optional runtime check for an unknown (untested) + // glibc version + if (minor <= 28) // WARNING: the highest tested version is 2.29 + val = 1772; // no guarantees for this one + else if (minor <= 31) + val = 1772; // tested against glibc 2.29, 2.31 + else + val = 1936; // tested against glibc 2.32 + } + #elif defined(__aarch64__) // The sizeof (struct pthread) is the same from GLIBC 2.17 to 2.22. val = 1776; @@ -452,7 +467,7 @@ static void GetTls(uptr *addr, uptr *size) { *addr -= *size; *addr += ThreadDescriptorSize(); #elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__arm__) + defined(__arm__) || SANITIZER_RISCV64 *addr = ThreadSelf(); *size = GetTlsSize(); #else @@ -509,7 +524,7 @@ uptr GetTlsSize() { uptr addr, size; GetTls(&addr, &size); return size; -#elif defined(__mips__) || defined(__powerpc64__) +#elif defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 return RoundUpTo(g_tls_size + TlsPreTcbSize(), 16); #else return g_tls_size; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 43c6e8a03c16d..5547c68bc5170 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -245,6 +245,8 @@ // will still work but will consume more memory for TwoLevelByteMap. #if defined(__mips__) # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 40) +#elif SANITIZER_RISCV64 +#define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 38) #elif defined(__aarch64__) # if SANITIZER_MAC // Darwin iOS/ARM64 has a 36-bit VMA, 64GiB VM diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index c6138e785afe1..25be06c1bc8f6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -15,6 +15,7 @@ #include "sanitizer_glibc_version.h" #include "sanitizer_internal_defs.h" +#include "sanitizer_platform.h" #if SANITIZER_POSIX # define SI_POSIX 1 @@ -278,10 +279,10 @@ #define SANITIZER_INTERCEPT_SYSINFO SI_LINUX #define SANITIZER_INTERCEPT_READDIR SI_POSIX #define SANITIZER_INTERCEPT_READDIR64 SI_LINUX_NOT_ANDROID || SI_SOLARIS32 -#if SI_LINUX_NOT_ANDROID && \ - (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__)) +#if SI_LINUX_NOT_ANDROID && \ + (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ + defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ + defined(__s390__) || SANITIZER_RISCV64) #define SANITIZER_INTERCEPT_PTRACE 1 #else #define SANITIZER_INTERCEPT_PTRACE 0 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index c80132bb25ea2..1427cec48c4a3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -90,7 +90,8 @@ #if SANITIZER_LINUX # include # include -# if defined(__mips64) || defined(__aarch64__) || defined(__arm__) +#if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ + SANITIZER_RISCV64 # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; @@ -303,13 +304,16 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); int glob_altdirfunc = GLOB_ALTDIRFUNC; #endif -#if SANITIZER_LINUX && !SANITIZER_ANDROID && \ - (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__)) +#if SANITIZER_LINUX && !SANITIZER_ANDROID && \ + (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ + defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ + defined(__s390__) || SANITIZER_RISCV64) #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__) unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t); +#elif SANITIZER_RISCV64 + unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct); + unsigned struct_user_fpregs_struct_sz = sizeof(struct __riscv_q_ext_state); #elif defined(__aarch64__) unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state); @@ -321,7 +325,8 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct); #endif // __mips64 || __powerpc64__ || __aarch64__ #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \ - defined(__aarch64__) || defined(__arm__) || defined(__s390__) + defined(__aarch64__) || defined(__arm__) || defined(__s390__) || \ + SANITIZER_RISCV64 unsigned struct_user_fpxregs_struct_sz = 0; #else unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 0f43e18e7f815..e69560ee39e95 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -101,7 +101,7 @@ const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #elif SANITIZER_RISCV64 const unsigned struct_kernel_stat_sz = 128; -const unsigned struct_kernel_stat64_sz = 104; +const unsigned struct_kernel_stat64_sz = 0; // RISCV64 does not use stat64 #endif struct __sanitizer_perf_event_attr { unsigned type; @@ -804,7 +804,7 @@ typedef void __sanitizer_FILE; #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__)) + defined(__s390__) || SANITIZER_RISCV64) extern unsigned struct_user_regs_struct_sz; extern unsigned struct_user_fpregs_struct_sz; extern unsigned struct_user_fpxregs_struct_sz; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp index e51609f54a0af..b0487d8987db2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp @@ -10,9 +10,11 @@ // run-time libraries. //===----------------------------------------------------------------------===// +#include "sanitizer_stacktrace.h" + #include "sanitizer_common.h" #include "sanitizer_flags.h" -#include "sanitizer_stacktrace.h" +#include "sanitizer_platform.h" namespace __sanitizer { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h index 9111acce0c60a..d9fd88d90a745 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -13,6 +13,7 @@ #define SANITIZER_STACKTRACE_H #include "sanitizer_internal_defs.h" +#include "sanitizer_platform.h" namespace __sanitizer { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 242ee159fdef8..6a3c00458efb4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -13,10 +13,10 @@ #include "sanitizer_platform.h" -#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \ - defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__s390__) || defined(__i386__) || \ - defined(__arm__)) +#if SANITIZER_LINUX && \ + (defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ + defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ + defined(__arm__) || SANITIZER_RISCV64) #include "sanitizer_stoptheworld.h" @@ -31,7 +31,7 @@ #include // for pid_t #include // for iovec #include // for NT_PRSTATUS -#if defined(__aarch64__) && !SANITIZER_ANDROID +#if (defined(__aarch64__) || SANITIZER_RISCV64) && !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h # include #endif @@ -507,6 +507,12 @@ typedef struct user_pt_regs regs_struct; static constexpr uptr kExtraRegs[] = {0}; #define ARCH_IOVEC_FOR_GETREGSET +#elif SANITIZER_RISCV64 +typedef struct user_regs_struct regs_struct; +#define REG_SP sp +static constexpr uptr kExtraRegs[] = {0}; +#define ARCH_IOVEC_FOR_GETREGSET + #elif defined(__s390__) typedef _user_regs_struct regs_struct; #define REG_SP gprs[15] diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 77522a20ae9b6..311d676439c0b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -12,6 +12,7 @@ #include "sanitizer_allocator_internal.h" #include "sanitizer_internal_defs.h" +#include "sanitizer_platform.h" #include "sanitizer_symbolizer_internal.h" namespace __sanitizer { @@ -258,6 +259,8 @@ class LLVMSymbolizerProcess : public SymbolizerProcess { const char* const kSymbolizerArch = "--default-arch=x86_64"; #elif defined(__i386__) const char* const kSymbolizerArch = "--default-arch=i386"; +#elif SANITIZER_RISCV64 + const char *const kSymbolizerArch = "--default-arch=riscv64"; #elif defined(__aarch64__) const char* const kSymbolizerArch = "--default-arch=arm64"; #elif defined(__arm__) From 1113fbf44c2250621548e278d2a1e11ab2b2d63d Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sun, 4 Oct 2020 14:21:00 +0200 Subject: [PATCH 494/544] [CodeGen] Improve likelihood branch weights Bruno De Fraine discovered some issues with D85091. The branch weights generated for `logical not` and `ternary conditional` were wrong. The `logical and` and `logical or` differed from the code generated of `__builtin_predict`. Adjusted the generated code for the likelihood to match `__builtin_predict`. The patch is based on Bruno's suggestions. Differential Revision: https://reviews.llvm.org/D88363 --- clang/lib/CodeGen/CGStmt.cpp | 29 +-- clang/lib/CodeGen/CodeGenFunction.cpp | 62 +++-- clang/lib/CodeGen/CodeGenFunction.h | 2 +- .../attr-likelihood-if-vs-builtin-expect.cpp | 223 ++++++++++++++++++ 4 files changed, 274 insertions(+), 42 deletions(-) create mode 100644 clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 83dd1be31633d..c9e6ce2df2c0d 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -27,7 +27,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/SaveAndRestore.h" -#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" using namespace clang; using namespace CodeGen; @@ -652,20 +651,6 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) { EmitBranch(IndGotoBB); } -static Optional> -getLikelihoodWeights(const IfStmt &If) { - switch (Stmt::getLikelihood(If.getThen(), If.getElse())) { - case Stmt::LH_Unlikely: - return std::pair(llvm::UnlikelyBranchWeight, - llvm::LikelyBranchWeight); - case Stmt::LH_None: - return None; - case Stmt::LH_Likely: - return std::pair(llvm::LikelyBranchWeight, - llvm::UnlikelyBranchWeight); - } - llvm_unreachable("Unknown Likelihood"); -} void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // C99 6.8.4.1: The first substatement is executed if the expression compares @@ -713,17 +698,11 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // Prefer the PGO based weights over the likelihood attribute. // When the build isn't optimized the metadata isn't used, so don't generate // it. - llvm::MDNode *Weights = nullptr; + Stmt::Likelihood LH = Stmt::LH_None; uint64_t Count = getProfileCount(S.getThen()); - if (!Count && CGM.getCodeGenOpts().OptimizationLevel) { - Optional> LHW = getLikelihoodWeights(S); - if (LHW) { - llvm::MDBuilder MDHelper(CGM.getLLVMContext()); - Weights = MDHelper.createBranchWeights(LHW->first, LHW->second); - } - } - - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, Weights); + if (!Count && CGM.getCodeGenOpts().OptimizationLevel) + LH = Stmt::getLikelihood(S.getThen(), S.getElse()); + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, LH); // Emit the 'then' code. EmitBlock(ThenBlock); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 47ef5c830723e..363b418dc198b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CRC.h" +#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" using namespace clang; using namespace CodeGen; @@ -1477,15 +1478,30 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, return true; } +static Optional> +getLikelihoodWeights(Stmt::Likelihood LH) { + switch (LH) { + case Stmt::LH_Unlikely: + return std::pair(llvm::UnlikelyBranchWeight, + llvm::LikelyBranchWeight); + case Stmt::LH_None: + return None; + case Stmt::LH_Likely: + return std::pair(llvm::LikelyBranchWeight, + llvm::UnlikelyBranchWeight); + } + llvm_unreachable("Unknown Likelihood"); +} + /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an if /// statement) to the specified blocks. Based on the condition, this might try /// to simplify the codegen of the conditional based on the branch. -/// \param Weights The weights determined by the likelihood attributes. +/// \param LH The value of the likelihood attribute on the True branch. void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, - llvm::MDNode *Weights) { + Stmt::Likelihood LH) { Cond = Cond->IgnoreParens(); if (const BinaryOperator *CondBOp = dyn_cast(Cond)) { @@ -1500,7 +1516,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // br(1 && X) -> br(X). incrementProfileCounter(CondBOp); return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, - TrueCount, Weights); + TrueCount, LH); } // If we have "X && 1", simplify the code to use an uncond branch. @@ -1509,7 +1525,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConstantBool) { // br(X && 1) -> br(X). return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock, - TrueCount, Weights); + TrueCount, LH); } // Emit the LHS as a conditional. If the LHS conditional is false, we @@ -1522,8 +1538,11 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConditionalEvaluation eval(*this); { ApplyDebugLocation DL(*this, Cond); + // Propagate the likelihood attribute like __builtin_expect + // __builtin_expect(X && Y, 1) -> X and Y are likely + // __builtin_expect(X && Y, 0) -> only Y is unlikely EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount, - Weights); + LH == Stmt::LH_Unlikely ? Stmt::LH_None : LH); EmitBlock(LHSTrue); } @@ -1533,7 +1552,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // Any temporaries created here are conditional. eval.begin(*this); EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount, - Weights); + LH); eval.end(*this); return; @@ -1548,7 +1567,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // br(0 || X) -> br(X). incrementProfileCounter(CondBOp); return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, - TrueCount, Weights); + TrueCount, LH); } // If we have "X || 0", simplify the code to use an uncond branch. @@ -1557,7 +1576,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, !ConstantBool) { // br(X || 0) -> br(X). return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock, - TrueCount, Weights); + TrueCount, LH); } // Emit the LHS as a conditional. If the LHS conditional is true, we @@ -1572,9 +1591,12 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConditionalEvaluation eval(*this); { + // Propagate the likelihood attribute like __builtin_expect + // __builtin_expect(X || Y, 1) -> only Y is likely + // __builtin_expect(X || Y, 0) -> both X and Y are unlikely ApplyDebugLocation DL(*this, Cond); EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount, - Weights); + LH == Stmt::LH_Likely ? Stmt::LH_None : LH); EmitBlock(LHSFalse); } @@ -1584,7 +1606,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // Any temporaries created here are conditional. eval.begin(*this); EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount, - Weights); + LH); eval.end(*this); @@ -1597,9 +1619,11 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (CondUOp->getOpcode() == UO_LNot) { // Negate the count. uint64_t FalseCount = getCurrentProfileCount() - TrueCount; + // The values of the enum are chosen to make this negation possible. + LH = static_cast(-LH); // Negate the condition and swap the destination blocks. return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock, - FalseCount, Weights); + FalseCount, LH); } } @@ -1608,9 +1632,11 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true"); llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false"); + // The ConditionalOperator itself has no likelihood information for its + // true and false branches. This matches the behavior of __builtin_expect. ConditionalEvaluation cond(*this); EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, - getProfileCount(CondOp), Weights); + getProfileCount(CondOp), Stmt::LH_None); // When computing PGO branch weights, we only know the overall count for // the true block. This code is essentially doing tail duplication of the @@ -1630,14 +1656,14 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, { ApplyDebugLocation DL(*this, Cond); EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock, - LHSScaledTrueCount, Weights); + LHSScaledTrueCount, LH); } cond.end(*this); cond.begin(*this); EmitBlock(RHSBlock); EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock, - TrueCount - LHSScaledTrueCount, Weights); + TrueCount - LHSScaledTrueCount, LH); cond.end(*this); return; @@ -1666,8 +1692,12 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, } } - // Create branch weights based on the number of times we get here and the - // number of times the condition should be true. + llvm::MDNode *Weights = nullptr; + Optional> LHW = getLikelihoodWeights(LH); + if (LHW) { + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + Weights = MDHelper.createBranchWeights(LHW->first, LHW->second); + } if (!Weights) { uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount); Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 16656de4e8f74..a9fb478647004 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4365,7 +4365,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// evaluate to true based on PGO data. void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, - llvm::MDNode *Weights = nullptr); + Stmt::Likelihood LH = Stmt::LH_None); /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is /// nonnull, if \p LHS is marked _Nonnull. diff --git a/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp b/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp new file mode 100644 index 0000000000000..5e73cd096742c --- /dev/null +++ b/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp @@ -0,0 +1,223 @@ +// RUN: %clang_cc1 -O1 -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck %s + +// Verifies the output of __builtin_expect versus the output of the likelihood +// attributes. They should generate the same probabilities for the branches. + +extern bool a(); +extern bool b(); +extern bool c(); + +void ab1(int &i) { + // CHECK-LABEL: define{{.*}}ab1 + // CHECK: br {{.*}} !prof !2 + // CHECK: br {{.*}} !prof !2 + // CHECK: br {{.*}} !prof !2 + if (__builtin_expect(a() && b() && a(), 1)) { + ++i; + } else { + --i; + } +} + +void al(int &i) { + // CHECK-LABEL: define{{.*}}al + // CHECK: br {{.*}} !prof !2 + // CHECK: br {{.*}} !prof !2 + // CHECK: br {{.*}} !prof !2 + if (a() && b() && c()) [[likely]] { + ++i; + } else { + --i; + } +} + +void ab0(int &i) { + // CHECK-LABEL: define{{.*}}ab0 + // CHECK: br {{.*}}else{{$}} + // CHECK: br {{.*}}else{{$}} + // CHECK: br {{.*}} !prof !8 + if (__builtin_expect(a() && b() && c(), 0)) { + ++i; + } else { + --i; + } +} + +void au(int &i) { + // CHECK-LABEL: define{{.*}}au + // CHECK: br {{.*}}else{{$}} + // CHECK: br {{.*}}else{{$}} + // CHECK: br {{.*}} !prof !8 + if (a() && b() && c()) [[unlikely]] { + ++i; + } else { + --i; + } +} + +void ob1(int &i) { + // CHECK-LABEL: define{{.*}}ob1 + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}}rhs{{$}} + // CHECK: br {{.*}} !prof !2 + if (__builtin_expect(a() || b() || a(), 1)) { + i = 0; + } else { + --i; + } +} + +void ol(int &i) { + // CHECK-LABEL: define{{.*}}ol + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}}false2{{$}} + // CHECK: br {{.*}} !prof !2 + if (a() || b() || c()) [[likely]] { + i = 0; + } else { + --i; + } +} + +void ob0(int &i) { + // CHECK-LABEL: define{{.*}}ob0 + // CHECK: br {{.*}} !prof !8 + // CHECK: br {{.*}} !prof !8 + // CHECK: br {{.*}} !prof !8 + if (__builtin_expect(a() || b() || c(), 0)) { + i = 0; + } else { + --i; + } +} + +void ou(int &i) { + // CHECK-LABEL: define{{.*}}ou + // CHECK: br {{.*}} !prof !8 + // CHECK: br {{.*}} !prof !8 + // CHECK: br {{.*}} !prof !8 + if (a() || b() || c()) [[unlikely]] { + i = 0; + } else { + --i; + } +} + +void nb1(int &i) { + // CHECK-LABEL: define{{.*}}nb1 + // CHECK: storemerge{{.*}} !prof !8 + if (__builtin_expect(!a(), 1)) { + ++i; + } else { + --i; + } +} + +void nl(int &i) { + // CHECK-LABEL: define{{.*}}nl + // CHECK: storemerge{{.*}} !prof !8 + if (!a()) [[likely]] { + ++i; + } else { + --i; + } +} + +void nb0(int &i) { + // CHECK-LABEL: define{{.*}}nb0 + // CHECK: storemerge{{.*}} !prof !2 + if (__builtin_expect(!a(), 0)) { + ++i; + } else { + --i; + } +} + +void nu(int &i) { + // CHECK-LABEL: define{{.*}}nu + // CHECK: storemerge{{.*}} !prof !2 + if (!a()) [[unlikely]] { + ++i; + } else { + --i; + } +} + +void tb1(int &i) { + // CHECK-LABEL: define{{.*}}tb1 + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: storemerge{{.*}} !prof !2 + if (__builtin_expect(a() ? b() : c(), 1)) { + ++i; + } else { + --i; + } +} + +void tl(int &i) { + // CHECK-LABEL: define{{.*}}tl + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: storemerge{{.*}} !prof !2 + if (bool d = a() ? b() : c()) [[likely]] { + ++i; + } else { + --i; + } +} + +void tl2(int &i) { + // CHECK-LABEL: define{{.*}}tl + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}} !prof !2 + // CHECK: br {{.*}} !prof !2 + if (a() ? b() : c()) [[likely]] { + ++i; + } else { + --i; + } +} + +void tb0(int &i) { + // CHECK-LABEL: define{{.*}}tb0 + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: storemerge{{.*}} !prof !8 + if (__builtin_expect(a() ? b() : c(), 0)) { + ++i; + } else { + --i; + } +} + +void tu(int &i) { + // CHECK-LABEL: define{{.*}}tu + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: br {{.*}}end{{$}} + // CHECK: storemerge{{.*}} !prof !8 + if (bool d = a() ? b() : c()) [[unlikely]] { + ++i; + } else { + --i; + } +} + +void tu2(int &i) { + // CHECK-LABEL: define{{.*}}tu + // CHECK: br {{.*}}false{{$}} + // CHECK: br {{.*}} !prof !8 + // CHECK: br {{.*}} !prof !8 + if (a() ? b() : c()) [[unlikely]] { + ++i; + } else { + --i; + } +} + +// CHECK: !2 = !{!"branch_weights", i32 2000, i32 1} +// CHECK: !8 = !{!"branch_weights", i32 1, i32 2000} From a566f0525a692e6e696add6f369edab979d6f300 Mon Sep 17 00:00:00 2001 From: Anatoly Parshintsev Date: Sun, 4 Oct 2020 15:26:50 +0300 Subject: [PATCH 495/544] [RISCV][ASAN] instrumentation pass now uses proper shadow offset [10/11] patch series to port ASAN for riscv64 Depends On D87580 Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D87581 --- llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 5902a2eb8374c..2d4e94386e45e 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -105,6 +105,7 @@ static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; +static const uint64_t kRISCV64_ShadowOffset64 = 0x20000000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30; @@ -447,6 +448,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsMIPS64 = TargetTriple.isMIPS64(); bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; bool IsWindows = TargetTriple.isOSWindows(); bool IsFuchsia = TargetTriple.isOSFuchsia(); bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad; @@ -515,6 +517,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, Mapping.Offset = kDynamicShadowSentinel; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; + else if (IsRISCV64) + Mapping.Offset = kRISCV64_ShadowOffset64; else Mapping.Offset = kDefaultShadowOffset64; } @@ -533,6 +537,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, // we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU && + !IsRISCV64 && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; bool IsAndroidWithIfuncSupport = From e4e5c42896df5ed61a98926ea42f5b1ab734e1c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 4 Oct 2020 15:32:05 +0100 Subject: [PATCH 496/544] [X86][SSE] isTargetShuffleEquivalent - ensure shuffle inputs are the correct size. Preliminary patch for the next stage of PR45974 - we don't want to be creating 'padded' vectors on-the-fly at all in combineX86ShufflesRecursively, and only pad the source inputs if we have a definite match inside combineX86ShuffleChain. This means that the inputs to combineX86ShuffleChain might soon be smaller than the final root value type, so we should ensure that isTargetShuffleEquivalent only matches with the inputs if they are the correct size. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 64 ++++++++++++++----------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 13de7bb75b8ab..9b5412c945ff8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10930,7 +10930,7 @@ static bool isShuffleEquivalent(ArrayRef Mask, ArrayRef ExpectedMask, /// /// SM_SentinelZero is accepted as a valid negative index but must match in /// both. -static bool isTargetShuffleEquivalent(ArrayRef Mask, +static bool isTargetShuffleEquivalent(MVT VT, ArrayRef Mask, ArrayRef ExpectedMask, SDValue V1 = SDValue(), SDValue V2 = SDValue()) { @@ -10944,6 +10944,12 @@ static bool isTargetShuffleEquivalent(ArrayRef Mask, if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) return false; + // Don't use V1/V2 if they're not the same size as the shuffle mask type. + if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits()) + V1 = SDValue(); + if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits()) + V2 = SDValue(); + for (int i = 0; i < Size; ++i) { int MaskIdx = Mask[i]; int ExpectedIdx = ExpectedMask[i]; @@ -11002,8 +11008,8 @@ static bool isUnpackWdShuffleMask(ArrayRef Mask, MVT VT) { SmallVector Unpckhwd; createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false, /* Unary = */ false); - bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) || - isTargetShuffleEquivalent(Mask, Unpckhwd)); + bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) || + isTargetShuffleEquivalent(VT, Mask, Unpckhwd)); return IsUnpackwdMask; } @@ -11020,8 +11026,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef Mask) { for (unsigned i = 0; i != 4; ++i) { SmallVector UnpackMask; createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); - if (isTargetShuffleEquivalent(Mask, UnpackMask) || - isTargetShuffleEquivalent(CommutedMask, UnpackMask)) + if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) || + isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask)) return true; } return false; @@ -11214,7 +11220,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // Attempt to match the target mask against the unpack lo/hi mask patterns. SmallVector Unpckl, Unpckh; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); - if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { UnpackOpcode = X86ISD::UNPCKL; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); @@ -11222,7 +11228,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, } createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); - if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { UnpackOpcode = X86ISD::UNPCKH; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); @@ -11260,14 +11266,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // If a binary shuffle, commute and try again. if (!IsUnary) { ShuffleVectorSDNode::commuteMask(Unpckl); - if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { UnpackOpcode = X86ISD::UNPCKL; std::swap(V1, V2); return true; } ShuffleVectorSDNode::commuteMask(Unpckh); - if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { UnpackOpcode = X86ISD::UNPCKH; std::swap(V1, V2); return true; @@ -11638,14 +11644,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, // Try binary shuffle. SmallVector BinaryMask; createPackShuffleMask(VT, BinaryMask, false, NumStages); - if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2)) + if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2)) if (MatchPACK(V1, V2, PackVT)) return true; // Try unary shuffle. SmallVector UnaryMask; createPackShuffleMask(VT, UnaryMask, true, NumStages); - if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1)) + if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1)) if (MatchPACK(V1, V1, PackVT)) return true; } @@ -34522,17 +34528,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef Mask, // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { - if (isTargetShuffleEquivalent(Mask, {0, 0}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v2f64; return true; } - if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v4f32; return true; @@ -34541,17 +34547,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef Mask, if (MaskVT.is256BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); - if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v4f64; return true; } - if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v8f32; return true; } - if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v8f32; return true; @@ -34561,19 +34567,21 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef Mask, if (MaskVT.is512BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX512() && "AVX512 required for 512-bit vector shuffles"); - if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v8f64; return true; } if (isTargetShuffleEquivalent( - Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { + MaskVT, Mask, + {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v16f32; return true; } if (isTargetShuffleEquivalent( - Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { + MaskVT, Mask, + {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v16f32; return true; @@ -34732,27 +34740,27 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { - if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) { V2 = V1; V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) { V2 = V1; Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() && - (AllowFloatDomain || !Subtarget.hasSSE41())) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) && + Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { std::swap(V1, V2); Shuffle = X86ISD::MOVSD; SrcVT = DstVT = MVT::v2f64; return true; } - if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) && (AllowFloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; SrcVT = DstVT = MVT::v4f32; @@ -35325,7 +35333,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // from a scalar. // TODO: Handle other insertions here as well? if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 && - Subtarget.hasSSE41() && !isTargetShuffleEquivalent(Mask, {4, 1, 2, 3})) { + Subtarget.hasSSE41() && + !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) { if (MaskEltSizeInBits == 32) { SDValue SrcV1 = V1, SrcV2 = V2; if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, @@ -35340,7 +35349,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return DAG.getBitcast(RootVT, Res); } } - if (MaskEltSizeInBits == 64 && isTargetShuffleEquivalent(Mask, {0, 2}) && + if (MaskEltSizeInBits == 64 && + isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) && V2.getOpcode() == ISD::SCALAR_TO_VECTOR && V2.getScalarValueSizeInBits() <= 32) { if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) From 2c48dd7c3ac5f8a0287d1fc7455d45d755d664aa Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 4 Oct 2020 16:59:43 +0200 Subject: [PATCH 497/544] [MemCpyOpt] Add additional call slot tests (NFC) The case of a destination read between call and memcpy was not covered anywhere (but is handled correctly). However, a potentially throwing call between the call and the memcpy appears to be miscompiled. --- llvm/test/Transforms/MemCpyOpt/callslot.ll | 91 ++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 llvm/test/Transforms/MemCpyOpt/callslot.ll diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll new file mode 100644 index 0000000000000..4b65fbcf88c23 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -memcpyopt < %s | FileCheck %s + +define i8 @read_dest_between_call_and_memcpy() { +; CHECK-LABEL: @read_dest_between_call_and_memcpy( +; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: store i8 1, i8* [[DEST]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[DEST]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret i8 [[X]] +; + %dest = alloca i8, i64 16 + %src = alloca i8, i64 16 + store i8 1, i8* %dest + call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) + %x = load i8, i8* %dest + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + ret i8 %x +} + +define i8 @read_src_between_call_and_memcpy() { +; CHECK-LABEL: @read_src_between_call_and_memcpy( +; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[SRC]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i1 false) +; CHECK-NEXT: ret i8 [[X]] +; + %dest = alloca i8, i64 16 + %src = alloca i8, i64 16 + call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) + %x = load i8, i8* %src + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + ret i8 %x +} + +define void @write_dest_between_call_and_memcpy() { +; CHECK-LABEL: @write_dest_between_call_and_memcpy( +; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, i8* [[DEST]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; + %dest = alloca i8, i64 16 + %src = alloca i8, i64 16 + call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) + store i8 1, i8* %dest + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + ret void +} + +define void @write_src_between_call_and_memcpy() { +; CHECK-LABEL: @write_src_between_call_and_memcpy( +; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, i8* [[SRC]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i1 false) +; CHECK-NEXT: ret void +; + %dest = alloca i8, i64 16 + %src = alloca i8, i64 16 + call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) + store i8 1, i8* %src + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + ret void +} + +; TODO: This is a miscompile. +define void @throw_between_call_and_mempy(i8* dereferenceable(16) %dest) { +; CHECK-LABEL: @throw_between_call_and_mempy( +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @may_throw() [[ATTR2:#.*]] +; CHECK-NEXT: ret void +; + %src = alloca i8, i64 16 + call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) + call void @may_throw() readnone + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + ret void +} + +declare void @may_throw() +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) From 2ccbf3dbd5bac9d4fea8b67404b4c6b006d4adbe Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 4 Oct 2020 11:07:27 -0400 Subject: [PATCH 498/544] [SDAG] fold x * 0.0 at node creation time In the motivating case from https://llvm.org/PR47517 we create a node that does not get constant folded before getNegatedExpression is attempted from some other node, and we crash. By moving the fold into SelectionDAG::simplifyFPBinop(), we get the constant fold sooner and avoid the problem. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ------- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 +++++ .../CodeGen/ARM/softfp-constant-comparison.ll | 15 ++------------- llvm/test/CodeGen/X86/fmul-combines.ll | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0b6aca4ca34c8..9df930a6e3ba9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13040,13 +13040,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) || - (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { - // fold (fmul A, 0) -> 0 - if (N1CFP && N1CFP->isZero()) - return N1; - } - if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 if (isConstantFPBuildVectorOrConstantFP(N1) && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index eef467d116b7f..62d01fbf96cdf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7475,6 +7475,11 @@ SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, if (YC->getValueAPF().isExactlyValue(1.0)) return X; + // X * 0.0 --> 0.0 + if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros()) + if (YC->getValueAPF().isZero()) + return getConstantFP(0.0, SDLoc(Y), Y.getValueType()); + return SDValue(); } diff --git a/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll b/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll index 0b4e42843cba5..e076e75e8066d 100644 --- a/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll +++ b/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll @@ -9,24 +9,13 @@ target triple = "thumbv7em-arm-none-eabi" define hidden void @fn1() nounwind #0 { ; CHECK-LABEL: fn1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vldr d0, .LCPI0_0 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: bl __aeabi_dcmpeq +; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: cbnz r0, .LBB0_2 ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_1: @ %land.rhs ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_2: @ %land.end -; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI0_0: -; CHECK-NEXT: .long 0 @ double 0 -; CHECK-NEXT: .long 0 +; CHECK-NEXT: bx lr entry: %0 = load i32, i32* @a, align 4 %conv = sitofp i32 %0 to double diff --git a/llvm/test/CodeGen/X86/fmul-combines.ll b/llvm/test/CodeGen/X86/fmul-combines.ll index 0c8a822e7ff76..f9a3e75c3d783 100644 --- a/llvm/test/CodeGen/X86/fmul-combines.ll +++ b/llvm/test/CodeGen/X86/fmul-combines.ll @@ -261,3 +261,22 @@ define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) { %mul = fmul <4 x float> %x.neg, %y.neg ret <4 x float> %mul } + +; PR47517 - this could crash if we create 'fmul x, 0.0' nodes +; that do not constant fold in a particular order. + +define float @getNegatedExpression_crash(float* %p) { +; CHECK-LABEL: getNegatedExpression_crash: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $0, (%rdi) +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + store float 0.0, float* %p, align 1 + %real = load float, float* %p, align 1 + %r2 = fmul fast float %real, %real + %t1 = fmul fast float %real, 42.0 + %t2 = fmul fast float %real, %t1 + %mul_ac56 = fmul fast float %t2, %t1 + %mul_ac72 = fmul fast float %r2, %mul_ac56 + ret float %mul_ac72 +} From e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Sun, 4 Oct 2020 16:24:20 +0000 Subject: [PATCH 499/544] [PowerPC] Add builtins for xvtdiv(dp|sp) and xvtsqrt(dp|sp). Summary: This patch implements the builtins for xvtdivdp, xvtdivsp, xvtsqrtdp, xvtsqrtsp. The instructions correspond to the following builtins: int vec_test_swdiv(vector double v1, vector double v2); int vec_test_swdivs(vector float v1, vector float v2); int vec_test_swsqrt(vector double v1); int vec_test_swsqrts(vector float v1); This patch depends on D88274, which fixes the bug in copying from CRRC to GPRC/G8RC. Reviewed By: steven.zhang, amyk Differential Revision: https://reviews.llvm.org/D88278 --- clang/include/clang/Basic/BuiltinsPPC.def | 5 +++ clang/lib/Headers/altivec.h | 26 ++++++++++++ clang/test/CodeGen/builtins-ppc-vsx.c | 18 ++++++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 10 +++++ llvm/lib/Target/PowerPC/PPCInstrVSX.td | 10 +++++ llvm/test/CodeGen/PowerPC/vsx_builtins.ll | 52 +++++++++++++++++++++++ 6 files changed, 121 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 29bce799c8f41..015411abc5082 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -558,6 +558,11 @@ BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "") BUILTIN(__builtin_vsx_xvtlsbb, "iV16UcUi", "") +BUILTIN(__builtin_vsx_xvtdivdp, "iV2dV2d", "") +BUILTIN(__builtin_vsx_xvtdivsp, "iV4fV4f", "") +BUILTIN(__builtin_vsx_xvtsqrtdp, "iV2d", "") +BUILTIN(__builtin_vsx_xvtsqrtsp, "iV4f", "") + // P10 Vector Permute Extended built-in. BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 572b8863dd1af..1d7bc201d3307 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -3504,6 +3504,20 @@ vec_div(vector signed __int128 __a, vector signed __int128 __b) { } #endif __POWER10_VECTOR__ +/* vec_xvtdiv */ + +#ifdef __VSX__ +static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a, + vector double __b) { + return __builtin_vsx_xvtdivdp(__a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a, + vector float __b) { + return __builtin_vsx_xvtdivsp(__a, __b); +} +#endif + /* vec_dss */ #define vec_dss __builtin_altivec_dss @@ -8057,6 +8071,18 @@ vec_vrsqrtefp(vector float __a) { return __builtin_altivec_vrsqrtefp(__a); } +/* vec_xvtsqrt */ + +#ifdef __VSX__ +static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) { + return __builtin_vsx_xvtsqrtdp(__a); +} + +static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) { + return __builtin_vsx_xvtsqrtsp(__a); +} +#endif + /* vec_sel */ #define __builtin_altivec_vsel_4si vec_sel diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index 2542b30590bf8..d99b0c1e8f413 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -52,6 +52,7 @@ vector unsigned long long res_vull; vector signed __int128 res_vslll; double res_d; +int res_i; float res_af[4]; double res_ad[2]; signed char res_asc[16]; @@ -878,6 +879,23 @@ void test1() { // CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}}) + res_i = vec_test_swsqrt(vd); +// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}}) + + res_i = vec_test_swsqrts(vf); +// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}}) + + res_i = vec_test_swdiv(vd, vd); +// CHECK: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}) + + res_i = vec_test_swdivs(vf, vf); +// CHECK: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) +// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}) + + dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 7b11555296a48..7ab4ee301bb59 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1249,6 +1249,16 @@ def int_ppc_vsx_xxinsertw : def int_ppc_vsx_xvtlsbb : PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtdivdp : + PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtdivsp : + PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtsqrtdp : + PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtsqrtsp : + PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xxeval : PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index f4612b9dfd312..18ed2cca0f025 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2591,6 +2591,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), (XVDIVDP $A, $B)>; +// Vector test for software divide and sqrt. +def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)), + (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)), + (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)), + (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>; +def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)), + (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>; + // Reciprocal estimate def : Pat<(int_ppc_vsx_xvresp v4f32:$A), (XVRESP $A)>; diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll index b386565500f62..2ab747384b698 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -54,3 +54,55 @@ define void @test4(<2 x double> %a, i8* %b) { } ; Function Attrs: nounwind readnone declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*) + +define i32 @test_vec_test_swdiv(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vec_test_swdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtdivdp cr0, v2, v3 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtdivdp(<2 x double>, <2 x double>) + +define i32 @test_vec_test_swdivs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vec_test_swdivs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtdivsp cr0, v2, v3 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %a, <4 x float> %b) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtdivsp(<4 x float>, <4 x float>) + +define i32 @test_vec_test_swsqrt(<2 x double> %a) { +; CHECK-LABEL: test_vec_test_swsqrt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %a) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>) + +define i32 @test_vec_test_swsqrts(<4 x float> %a) { +; CHECK-LABEL: test_vec_test_swsqrts: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtsp cr0, v2 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: blr + entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %a) + ret i32 %0 +} +declare i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float>) From aaae13d0c29ec2a20f93e6adb9d9b5c2656d2af6 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 4 Oct 2020 19:50:30 +0300 Subject: [PATCH 500/544] [NFC][clang][codegen] Autogenerate a few ARM SVE tests that are being affected by an upcoming patch --- .../attr-arm-sve-vector-bits-bitcast.c | 96 +++++++-------- .../CodeGen/attr-arm-sve-vector-bits-call.c | 112 +++++++++--------- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 48 ++++---- .../attr-arm-sve-vector-bits-globals.c | 48 ++++---- 4 files changed, 152 insertions(+), 152 deletions(-) diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index 84559e9edb9a3..3a5628d7f57e4 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -31,21 +31,21 @@ DEFINE_STRUCT(bool) // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x i64>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_int64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x i64>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_int64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x i64>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] // CHECK-512-NEXT: ret [[TMP1]] // svint64_t read_int64(struct struct_int64 *s) { @@ -55,31 +55,31 @@ svint64_t read_int64(struct struct_int64 *s) { // CHECK-128-LABEL: @write_int64( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]] +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <2 x i64>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, [[TBAA6]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_int64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]] +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <4 x i64>* -// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 16, [[TBAA6]] // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-256-NEXT: store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_int64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]] +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x i64>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, [[TBAA6]] // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_int64(struct struct_int64 *s, svint64_t x) { @@ -94,21 +94,21 @@ void write_int64(struct struct_int64 *s, svint64_t x) { // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x double>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_float64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x double>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_float64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6]] // CHECK-512-NEXT: ret [[TMP1]] // svfloat64_t read_float64(struct struct_float64 *s) { @@ -118,31 +118,31 @@ svfloat64_t read_float64(struct struct_float64 *s) { // CHECK-128-LABEL: @write_float64( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <2 x double>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 16, [[TBAA6]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-128-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_float64( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <4 x double>* -// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 16, [[TBAA6]] // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <4 x double> [[TMP1]], <4 x double>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-256-NEXT: store <4 x double> [[TMP1]], <4 x double>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_float64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x double>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, [[TBAA6]] // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-512-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_float64(struct struct_float64 *s, svfloat64_t x) { @@ -157,21 +157,21 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) { // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bfloat16( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <16 x bfloat>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_bfloat16( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA6]] // CHECK-512-NEXT: ret [[TMP1]] // svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { @@ -181,31 +181,31 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) { // CHECK-128-LABEL: @write_bfloat16( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x bfloat>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, [[TBAA6]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bfloat16( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <16 x bfloat>* -// CHECK-256-NEXT: [[TMP1:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[TMP0]], align 16, [[TBAA6]] // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-256-NEXT: store <16 x bfloat> [[TMP1]], <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-256-NEXT: store <16 x bfloat> [[TMP1]], <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bfloat16( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <32 x bfloat>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, [[TBAA6]] // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA2]] +// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { @@ -220,21 +220,21 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) { // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x i8>* [[ARRAYIDX]] to * -// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA6]] // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bool( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x i8>* [[ARRAYIDX]] to * -// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA6]] // CHECK-256-NEXT: ret [[TMP1]] // // CHECK-512-LABEL: @read_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[ARRAYIDX]] to * -// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load , * [[TMP0]], align 2, [[TBAA6]] // CHECK-512-NEXT: ret [[TMP1]] // svbool_t read_bool(struct struct_bool *s) { @@ -244,33 +244,33 @@ svbool_t read_bool(struct struct_bool *s) { // CHECK-128-LABEL: @write_bool( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] +// CHECK-128-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <2 x i8>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]] // CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 -// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, [[TBAA2]] +// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bool( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] +// CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] // CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i32* -// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA2]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA6]] // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 // CHECK-256-NEXT: [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32* -// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA2]] +// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA11:!tbaa !.*]] +// CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA2]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] // CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 // CHECK-512-NEXT: [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64* -// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA2]] +// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index 1c08e46681fbc..5442d58e96bea 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -30,13 +30,13 @@ svint32_t sizeless_callee(svint32_t x) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[X]] to * // CHECK-NEXT: store [[X_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, [[TBAA2:!tbaa !.*]] -// CHECK-NEXT: store <16 x i32> [[X1]], <16 x i32>* [[X_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT: store <16 x i32> [[X1]], <16 x i32>* [[X_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[X_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] -// CHECK-NEXT: store [[TMP2]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5:!tbaa !.*]] +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] +// CHECK-NEXT: store [[TMP2]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -52,7 +52,7 @@ fixed_int32_t fixed_caller(fixed_int32_t x) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[X]] to * // CHECK-NEXT: store [[X_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, [[TBAA2]] +// CHECK-NEXT: [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[X1]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -68,19 +68,19 @@ fixed_int32_t fixed_callee(fixed_int32_t x) { // CHECK-NEXT: [[COERCE_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[COERCE1:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA5]] +// CHECK-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA9]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <16 x i32>* -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA6]] // CHECK-NEXT: [[COERCE_0__SROA_CAST:%.*]] = bitcast * [[COERCE_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[COERCE_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP2:%.*]] = load , * [[COERCE_COERCE]], align 16 // CHECK-NEXT: [[CALL:%.*]] = call @fixed_callee( [[TMP2]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32>* [[COERCE1]] to * // CHECK-NEXT: store [[CALL]], * [[TMP3]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[TMP4]], <16 x i32>* [[SAVED_CALL_RVALUE]], align 64, [[TBAA2]] +// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[TMP4]], <16 x i32>* [[SAVED_CALL_RVALUE]], align 64, [[TBAA6]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <16 x i32>* [[SAVED_CALL_RVALUE]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[CASTFIXEDSVE]], align 64, [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[CASTFIXEDSVE]], align 64, [[TBAA6]] // CHECK-NEXT: ret [[TMP5]] // svint32_t sizeless_caller(svint32_t x) { @@ -101,21 +101,21 @@ svint32_t sizeless_caller(svint32_t x) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[OP2]] to * // CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 16 -// CHECK-NEXT: [[OP22:%.*]] = load <16 x i32>, <16 x i32>* [[OP2]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[OP22]], <16 x i32>* [[OP2_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP22:%.*]] = load <16 x i32>, <16 x i32>* [[OP2]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[OP22]], <16 x i32>* [[OP2_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP6]], [[TMP3]], [[TMP5]]) -// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5]] +// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP8]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -135,21 +135,21 @@ fixed_int32_t call_int32_ff(svbool_t pg, fixed_int32_t op1, fixed_int32_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x double>* [[OP2]] to * // CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 16 -// CHECK-NEXT: [[OP22:%.*]] = load <8 x double>, <8 x double>* [[OP2]], align 16, [[TBAA2]] -// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, [[TBAA2]] -// CHECK-NEXT: store <8 x double> [[OP22]], <8 x double>* [[OP2_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP22:%.*]] = load <8 x double>, <8 x double>* [[OP2]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x double> [[OP22]], <8 x double>* [[OP2_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x double>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP7:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP6]], [[TMP3]], [[TMP5]]) -// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-NEXT: store [[TMP7]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x double>* -// CHECK-NEXT: [[TMP8:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP8:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP8]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -170,23 +170,23 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to * // CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP2]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64* -// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA2]] +// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA2]] +// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP7:%.*]] = load , * [[TMP6]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP7:%.*]] = load , * [[TMP6]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP9:%.*]] = load , * [[TMP8]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP9:%.*]] = load , * [[TMP8]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP10:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP7]], [[TMP9]]) -// CHECK-NEXT: store [[TMP10]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-NEXT: store [[TMP10]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-NEXT: [[TMP11:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP13:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP12]], i64* [[TMP13]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -208,15 +208,15 @@ fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP3]], [[TMP2]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5]] +// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP5]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP6:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -234,15 +234,15 @@ fixed_int32_t call_int32_fs(svbool_t pg, fixed_int32_t op1, svint32_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, [[TBAA2]] -// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x double>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP3]], [[TMP2]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA7]] +// CHECK-NEXT: store [[TMP4]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA11]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x double>* -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP5]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP6:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -261,15 +261,15 @@ fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA2]] +// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP4]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP5]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] +// CHECK-NEXT: store [[TMP5]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] // CHECK-NEXT: [[TMP6:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP8:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP8]], align 16 // CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -289,9 +289,9 @@ fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv4i32( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA5]] +// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <16 x i32>* -// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP2]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -307,9 +307,9 @@ fixed_int32_t call_int32_ss(svbool_t pg, svint32_t op1, svint32_t op2) { // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.sel.nxv2f64( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA7]] +// CHECK-NEXT: store [[TMP1]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA11]] // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x double>* -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP2]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -324,9 +324,9 @@ fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { // CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP0]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA9]] +// CHECK-NEXT: store [[TMP0]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP2]], i64* [[TMP3]], align 16 // CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index e65537cead104..17267d6038e49 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -17,10 +17,10 @@ typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8))); // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, [[TBAA2:!tbaa !.*]] -// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[TYPE_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, [[TBAA6:!tbaa !.*]] +// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[TYPE_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: ret [[TMP2]] // svint32_t to_svint32_t(fixed_int32_t type) { @@ -31,9 +31,9 @@ svint32_t to_svint32_t(fixed_int32_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA5:!tbaa !.*]] +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA9:!tbaa !.*]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <16 x i32>* -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -49,10 +49,10 @@ fixed_int32_t from_svint32_t(svint32_t type) { // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <8 x double>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x double>, <8 x double>* [[TYPE]], align 16, [[TBAA2]] -// CHECK-NEXT: store <8 x double> [[TYPE1]], <8 x double>* [[TYPE_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x double>, <8 x double>* [[TYPE]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x double> [[TYPE1]], <8 x double>* [[TYPE_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x double>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: ret [[TMP2]] // svfloat64_t to_svfloat64_t(fixed_float64_t type) { @@ -63,9 +63,9 @@ svfloat64_t to_svfloat64_t(fixed_float64_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <8 x double>* -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x double>* // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -82,11 +82,11 @@ fixed_float64_t from_svfloat64_t(svfloat64_t type) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64* -// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64* -// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA2]] +// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA6]] // CHECK-NEXT: ret [[TMP4]] // svbool_t to_svbool_t(fixed_bool_t type) { @@ -97,9 +97,9 @@ svbool_t to_svbool_t(fixed_bool_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to i64* -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* // CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 16 // CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -112,10 +112,10 @@ fixed_bool_t from_svbool_t(svbool_t type) { // CHECK-LABEL: @to_svint32_t__from_gnu_int32_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 16 -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[TYPE_ADDR]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[TYPE_ADDR]], align 16, [[TBAA6]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] // CHECK-NEXT: ret [[TMP2]] // svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { @@ -125,10 +125,10 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { // CHECK-LABEL: @from_svint32_t__to_gnu_int32_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA5]] +// CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA9]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <16 x i32>* -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] // CHECK-NEXT: ret void // gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { @@ -138,7 +138,7 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { // CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0:%.*]], align 16, [[TBAA6]] // CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <16 x i32>* // CHECK-NEXT: store <16 x i32> [[TYPE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = load , * [[RETVAL_COERCE]], align 16 @@ -153,8 +153,8 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { // CHECK-NEXT: [[TYPE:%.*]] = alloca <16 x i32>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, [[TBAA2]] -// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA2]] +// CHECK-NEXT: [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[TYPE1]], <16 x i32>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]] // CHECK-NEXT: ret void // gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index 28464ed4af2b7..5babb9c7c410b 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -22,19 +22,19 @@ fixed_bool_t global_bool; // CHECK-128-LABEL: @write_global_i64( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA6:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <2 x i64>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* @global_i64, align 16, [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, [[TBAA10:!tbaa !.*]] +// CHECK-128-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* @global_i64, align 16, [[TBAA10]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_i64( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA2:!tbaa !.*]] +// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA6:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <8 x i64>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* @global_i64, align 16, [[TBAA6]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, [[TBAA10:!tbaa !.*]] +// CHECK-512-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* @global_i64, align 16, [[TBAA10]] // CHECK-512-NEXT: ret void // void write_global_i64(svint64_t v) { global_i64 = v; } @@ -42,19 +42,19 @@ void write_global_i64(svint64_t v) { global_i64 = v; } // CHECK-128-LABEL: @write_global_bf16( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <8 x bfloat>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, [[TBAA6]] -// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* @global_bf16, align 16, [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, [[TBAA10]] +// CHECK-128-NEXT: store <8 x bfloat> [[TMP1]], <8 x bfloat>* @global_bf16, align 16, [[TBAA10]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_bf16( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA7:!tbaa !.*]] +// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA11:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <32 x bfloat>* -// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, [[TBAA6]] -// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* @global_bf16, align 16, [[TBAA6]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, [[TBAA10]] +// CHECK-512-NEXT: store <32 x bfloat> [[TMP1]], <32 x bfloat>* @global_bf16, align 16, [[TBAA10]] // CHECK-512-NEXT: ret void // void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } @@ -62,19 +62,19 @@ void write_global_bf16(svbfloat16_t v) { global_bf16 = v; } // CHECK-128-LABEL: @write_global_bool( // CHECK-128-NEXT: entry: // CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-128-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-128-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <2 x i8>* -// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]] -// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA10]] +// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, [[TBAA10]] // CHECK-128-NEXT: ret void // // CHECK-512-LABEL: @write_global_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 -// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]] +// CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA13:!tbaa !.*]] // CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] -// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA6]] +// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA10]] +// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA10]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } @@ -85,36 +85,36 @@ void write_global_bool(svbool_t v) { global_bool = v; } // CHECK-128-LABEL: @read_global_i64( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i64>* @global_i64 to *), align 16, [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i64>* @global_i64 to *), align 16, [[TBAA10]] // CHECK-128-NEXT: ret [[TMP0]] // // CHECK-512-LABEL: @read_global_i64( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i64>* @global_i64 to *), align 16, [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i64>* @global_i64 to *), align 16, [[TBAA10]] // CHECK-512-NEXT: ret [[TMP0]] // svint64_t read_global_i64() { return global_i64; } // CHECK-128-LABEL: @read_global_bf16( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x bfloat>* @global_bf16 to *), align 16, [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x bfloat>* @global_bf16 to *), align 16, [[TBAA10]] // CHECK-128-NEXT: ret [[TMP0]] // // CHECK-512-LABEL: @read_global_bf16( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<32 x bfloat>* @global_bf16 to *), align 16, [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<32 x bfloat>* @global_bf16 to *), align 16, [[TBAA10]] // CHECK-512-NEXT: ret [[TMP0]] // svbfloat16_t read_global_bf16() { return global_bf16; } // CHECK-128-LABEL: @read_global_bool( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i8>* @global_bool to *), align 2, [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load , * bitcast (<2 x i8>* @global_bool to *), align 2, [[TBAA10]] // CHECK-128-NEXT: ret [[TMP0]] // // CHECK-512-LABEL: @read_global_bool( // CHECK-512-NEXT: entry: -// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i8>* @global_bool to *), align 2, [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = load , * bitcast (<8 x i8>* @global_bool to *), align 2, [[TBAA10]] // CHECK-512-NEXT: ret [[TMP0]] // svbool_t read_global_bool() { return global_bool; } From f5fe7abe8a8c8150b7b305bae963b429f15ea217 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 3 Oct 2020 17:39:35 +0100 Subject: [PATCH 501/544] [VPlan] Account for removed users in replaceAllUsesWith. Make sure we do not iterate using an invalid iterator. Another small fix/step towards traversing the def-use chains in VPlan. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 10 +++++++++- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 4179832410238..95d5cfdcafbae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -884,10 +884,18 @@ void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, template void DomTreeBuilder::Calculate(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { - for (VPUser *User : users()) + for (unsigned J = 0; J < getNumUsers();) { + VPUser *User = Users[J]; + unsigned NumUsers = getNumUsers(); for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) if (User->getOperand(I) == this) User->setOperand(I, New); + // If a user got removed after updating the current user, the next user to + // update will be moved to the current position, so we only need to + // increment the index if the number of users did not change. + if (NumUsers == getNumUsers()) + J++; + } } void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index a64f9e374ebff..926473af7d460 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -167,7 +167,13 @@ TEST(VPInstructionTest, replaceAllUsesWith) { EXPECT_EQ(0u, VPV2->getNumUsers()); EXPECT_EQ(0u, VPV3->getNumUsers()); + VPInstruction *I2 = new VPInstruction(0, {VPV1, VPV2}); + EXPECT_EQ(3u, VPV1->getNumUsers()); + VPV1->replaceAllUsesWith(VPV3); + EXPECT_EQ(3u, VPV3->getNumUsers()); + delete I1; + delete I2; delete VPV1; delete VPV2; delete VPV3; From 357bbaab666b212c5bfb65df80e76aace5367eff Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Oct 2020 19:01:49 +0100 Subject: [PATCH 502/544] [VPlan] Add VPRecipeBase::toVPUser helper (NFC). This adds a helper to convert a VPRecipeBase pointer to a VPUser, for recipes that inherit from VPUser. Once VPRecipeBase directly inherits from VPUser this helper can be removed. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 24 +++++++++++++++++++ llvm/lib/Transforms/Vectorize/VPlan.h | 7 +++++- .../Transforms/Vectorize/VPlanTest.cpp | 17 +++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 95d5cfdcafbae..cb5a43272e54b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -77,6 +77,30 @@ void VPRecipeBase::dump() const { dbgs() << "\n"; } +VPUser *VPRecipeBase::toVPUser() { + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + if (auto *U = dyn_cast(this)) + return U; + return nullptr; +} + // Get the top-most entry block of \p Start. This is the entry block of the // containing VPlan. This function is templated to support both const and non-const blocks template static T *getPlanEntry(T *Start) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 416a79eacfa79..fae73fdc57820 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -676,10 +676,15 @@ class VPRecipeBase : public ilist_node_with_parent { /// /// \returns an iterator pointing to the element after the erased one iplist::iterator eraseFromParent(); + + /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or + /// nullptr otherwise. + VPUser *toVPUser(); }; inline bool VPUser::classof(const VPRecipeBase *Recipe) { - return Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSC || + return Recipe->getVPRecipeID() == VPRecipeBase::VPInstructionSC || + Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSC || Recipe->getVPRecipeID() == VPRecipeBase::VPWidenCallSC || Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC || Recipe->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC || diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 926473af7d460..39727ca01d84a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -339,6 +339,16 @@ compound=true } } +TEST(VPRecipeTest, CastVPInstructionToVPUser) { + VPValue Op1; + VPValue Op2; + VPInstruction Recipe(Instruction::Add, {&Op1, &Op2}); + EXPECT_TRUE(isa(&Recipe)); + VPRecipeBase *BaseR = &Recipe; + EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&Recipe, BaseR->toVPUser()); +} + TEST(VPRecipeTest, CastVPWidenRecipeToVPUser) { LLVMContext C; @@ -354,6 +364,7 @@ TEST(VPRecipeTest, CastVPWidenRecipeToVPUser) { EXPECT_TRUE(isa(&WidenR)); VPRecipeBase *WidenRBase = &WidenR; EXPECT_TRUE(isa(WidenRBase)); + EXPECT_EQ(&WidenR, WidenRBase->toVPUser()); delete AI; } @@ -372,6 +383,7 @@ TEST(VPRecipeTest, CastVPWidenCallRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&Recipe, BaseR->toVPUser()); delete Call; } @@ -394,6 +406,7 @@ TEST(VPRecipeTest, CastVPWidenSelectRecipeToVPUser) { EXPECT_TRUE(isa(&WidenSelectR)); VPRecipeBase *BaseR = &WidenSelectR; EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&WidenSelectR, BaseR->toVPUser()); delete SelectI; } @@ -413,6 +426,7 @@ TEST(VPRecipeTest, CastVPWidenGEPRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&Recipe, BaseR->toVPUser()); delete GEP; } @@ -442,6 +456,7 @@ TEST(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&Recipe, BaseR->toVPUser()); } TEST(VPRecipeTest, CastVPReplicateRecipeToVPUser) { @@ -468,6 +483,7 @@ TEST(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&Recipe, BaseR->toVPUser()); } TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUser) { @@ -483,6 +499,7 @@ TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUser) { EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); + EXPECT_EQ(&Recipe, BaseR->toVPUser()); delete Load; } From 6c6cd5f8a9750865800ce26bdeacd84533335db3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 12:09:21 -0700 Subject: [PATCH 503/544] [X86] Consolidate wide Key Locker intrinsics into the same header as the other Key Locker intrinsics. --- clang/lib/Headers/CMakeLists.txt | 1 - clang/lib/Headers/immintrin.h | 7 +- clang/lib/Headers/keylocker_wide_intrin.h | 259 ---------------------- clang/lib/Headers/keylockerintrin.h | 239 ++++++++++++++++++++ 4 files changed, 240 insertions(+), 266 deletions(-) delete mode 100644 clang/lib/Headers/keylocker_wide_intrin.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 8c12d5ab935d8..95047e7069e7b 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -73,7 +73,6 @@ set(files invpcidintrin.h iso646.h keylockerintrin.h - keylocker_wide_intrin.h limits.h lwpintrin.h lzcntintrin.h diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 1beade1be2484..8fb5447a5919c 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -472,15 +472,10 @@ _storebe_i64(void * __P, long long __D) { #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ - defined(__KL__) + defined(__KL__) || defined(__WIDEKL__) #include #endif -#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ - defined(__WIDEKL__) -#include -#endif - #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__) #include diff --git a/clang/lib/Headers/keylocker_wide_intrin.h b/clang/lib/Headers/keylocker_wide_intrin.h deleted file mode 100644 index 9b6c9ccab811f..0000000000000 --- a/clang/lib/Headers/keylocker_wide_intrin.h +++ /dev/null @@ -1,259 +0,0 @@ -/*===-------------- keylocker_wide_intrin.h - KL_WIDE Intrinsics ------------=== - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef _KEYLOCKERINTRIN_WIDE_H -#define _KEYLOCKERINTRIN_WIDE_H - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ - __min_vector_width__(128))) - -/// Encrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle -/// at __h and store each resultant block back from __odata to __odata+7. And -/// return the affected ZF flag status. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the AESENCWIDE128KL instructions. -/// -/// \operation -/// Handle := MEM[__h+383:__h] -/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || -/// (Handle[127:0] AND (CPL > 0)) || -/// Handle[255:128] || -/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) -/// IF (IllegalHandle) -/// ZF := 1 -/// ELSE -/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) -/// IF Authentic == 0 -/// ZF := 1 -/// ELSE -/// FOR i := 0 to 7 -/// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) -/// ENDFOR -/// ZF := 0 -/// FI -/// FI -/// dst := ZF -/// OF := 0 -/// SF := 0 -/// AF := 0 -/// PF := 0 -/// CF := 0 -/// \endoperation -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesencwide128kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); -} - -/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle -/// at __h and store each resultant block back from __odata to __odata+7. And -/// return the affected ZF flag status. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the AESENCWIDE256KL instructions. -/// -/// \operation -/// Handle[511:0] := MEM[__h+511:__h] -/// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || -/// (Handle[127:0] AND (CPL > 0)) || -/// Handle[255:128] || -/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) -/// IF (IllegalHandle) -/// ZF := 1 -/// ELSE -/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) -/// IF Authentic == 0 -/// ZF := 1 -/// ELSE -/// FOR i := 0 to 7 -/// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) -/// ENDFOR -/// ZF := 0 -/// FI -/// FI -/// dst := ZF -/// OF := 0 -/// SF := 0 -/// AF := 0 -/// PF := 0 -/// CF := 0 -/// \endoperation -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesencwide256kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); -} - -/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle -/// at __h and store each resultant block back from __odata to __odata+7. And -/// return the affected ZF flag status. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the AESDECWIDE128KL instructions. -/// -/// \operation -/// Handle[383:0] := MEM[__h+383:__h] -/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || -/// (Handle[127:0] AND (CPL > 0)) || -/// Handle[255:128] || -/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) -/// IF (IllegalHandle) -/// ZF := 1 -/// ELSE -/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) -/// IF Authentic == 0 -/// ZF := 1 -/// ELSE -/// FOR i := 0 to 7 -/// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) -/// ENDFOR -/// ZF := 0 -/// FI -/// FI -/// dst := ZF -/// OF := 0 -/// SF := 0 -/// AF := 0 -/// PF := 0 -/// CF := 0 -/// \endoperation -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesdecwide128kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); -} - -/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle -/// at __h and store each resultant block back from __odata to __odata+7. And -/// return the affected ZF flag status. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the AESDECWIDE256KL instructions. -/// -/// \operation -/// Handle[511:0] := MEM[__h+511:__h] -/// IllegalHandle = ( HandleReservedBitSet (Handle[511:0]) || -/// (Handle[127:0] AND (CPL > 0)) || -/// Handle[255:128] || -/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) -/// If (IllegalHandle) -/// ZF := 1 -/// ELSE -/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) -/// IF Authentic == 0 -/// ZF := 1 -/// ELSE -/// FOR i := 0 to 7 -/// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) -/// ENDFOR -/// ZF := 0 -/// FI -/// FI -/// dst := ZF -/// OF := 0 -/// SF := 0 -/// AF := 0 -/// PF := 0 -/// CF := 0 -/// \endoperation -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesdecwide256kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); -} - - -#undef __DEFAULT_FN_ATTRS - -#endif /* _KEYLOCKERINTRIN_WIDE_H */ diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index 5bd4fe59c6be0..2d6a1ca5851fb 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -28,6 +28,9 @@ #ifndef _KEYLOCKERINTRIN_H #define _KEYLOCKERINTRIN_H +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__KL__) + /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ @@ -340,4 +343,240 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { #undef __DEFAULT_FN_ATTRS +#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ + || defined(__KL__) */ + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__WIDEKL__) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ + __min_vector_width__(128))) + +/// Encrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENCWIDE128KL instructions. +/// +/// \operation +/// Handle := MEM[__h+383:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesencwide128kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENCWIDE256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesencwide256kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDECWIDE128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesdecwide128kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDECWIDE256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle = ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) +/// If (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesdecwide256kl(__h, + __odata, + __odata + 1, + __odata + 2, + __odata + 3, + __odata + 4, + __odata + 5, + __odata + 6, + __odata + 7, + __idata[0], + __idata[1], + __idata[2], + __idata[3], + __idata[4], + __idata[5], + __idata[6], + __idata[7]); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ + || defined(__WIDEKL__) */ + #endif /* _KEYLOCKERINTRIN_H */ From 28595cbbeb2cc75584410b8b974f67ec99a853f2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 12:09:29 -0700 Subject: [PATCH 504/544] [X86] Synchronize the loadiwkey builtin operand order with gcc version. --- clang/include/clang/Basic/BuiltinsX86.def | 2 +- clang/lib/Headers/keylockerintrin.h | 2 +- llvm/include/llvm/IR/IntrinsicsX86.td | 2 +- llvm/lib/Target/X86/X86InstrKL.td | 2 +- llvm/test/CodeGen/X86/keylocker-intrinsics.ll | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index e212d0a2a0cca..1fbc950998a1c 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1901,7 +1901,7 @@ TARGET_BUILTIN(__builtin_ia32_enqcmd, "Ucv*vC*", "n", "enqcmd") TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") // KEY LOCKER -TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vUiV2OiV2OiV2Oi", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_encodekey128, "UiUiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_encodekey256, diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index 2d6a1ca5851fb..718771c869cc4 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -95,7 +95,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_loadiwkey (unsigned int __ctl, __m128i __intkey, __m128i __enkey_lo, __m128i __enkey_hi) { - __builtin_ia32_loadiwkey (__ctl, __intkey, __enkey_lo, __enkey_hi); + __builtin_ia32_loadiwkey (__intkey, __enkey_lo, __enkey_hi, __ctl); } /// Wrap a 128-bit AES key from __key into a key handle and output in diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5708a761919f5..8546dc311114b 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4953,7 +4953,7 @@ let TargetPrefix = "x86" in { // Key Locker let TargetPrefix = "x86" in { def int_x86_loadiwkey : GCCBuiltin<"__builtin_ia32_loadiwkey">, - Intrinsic<[], [llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + Intrinsic<[], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], []>; def int_x86_encodekey128 : Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td index aa7df4256cec5..7a7e6467ae976 100644 --- a/llvm/lib/Target/X86/X86InstrKL.td +++ b/llvm/lib/Target/X86/X86InstrKL.td @@ -20,7 +20,7 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in { let Uses = [XMM0, EAX], Defs = [EFLAGS] in { def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "loadiwkey\t{$src2, $src1|$src1, $src2}", - [(int_x86_loadiwkey EAX, XMM0, VR128:$src1, VR128:$src2)]>, T8XS; + [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8XS; } let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in { diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll index 584391f2eafdb..2f9797e437b7d 100644 --- a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32 -declare void @llvm.x86.loadiwkey(i32, <2 x i64>, <2 x i64>, <2 x i64>) +declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*) @@ -29,7 +29,7 @@ define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 ; X32-NEXT: loadiwkey %xmm2, %xmm1 ; X32-NEXT: retl entry: - tail call void @llvm.x86.loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) + tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl) ret void } From 230c57b0bd8321085a5e0339baf37b509d5c76f6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 12:09:35 -0700 Subject: [PATCH 505/544] [X86] Synchronize the encodekey builtins with gcc. Don't assume void* is 16 byte aligned. We were taking multiple pointer arguments in the builtin. gcc accepts a single void*. The cast from void* to _m128i* caused the IR generation to assume the pointer was aligned. Instead make the builtin take a single void*, emit i8* GEPs to adjust then cast to <2 x i64>* and perform a store with align of 1. --- clang/include/clang/Basic/BuiltinsX86.def | 8 +- clang/lib/CodeGen/CGBuiltin.cpp | 45 +++++--- clang/lib/Headers/keylockerintrin.h | 22 +--- clang/test/CodeGen/X86/keylocker.c | 52 +++++++++ .../X86/keylocker-intrinsics-fast-isel.ll | 104 ++++++++++++++++++ 5 files changed, 194 insertions(+), 37 deletions(-) create mode 100644 llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 1fbc950998a1c..c33026139b3cf 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1902,10 +1902,10 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") // KEY LOCKER TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey128, - "UiUiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey256, - "UiUiV2OiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, + "UiUiV2Oiv*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, + "UiUiV2OiV2Oiv*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e5f6ee138a21e..d3603579844dc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14039,8 +14039,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_psubusb128: case X86::BI__builtin_ia32_psubusw128: return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); - case X86::BI__builtin_ia32_encodekey128: - case X86::BI__builtin_ia32_encodekey256: + case X86::BI__builtin_ia32_encodekey128_u32: { + Intrinsic::ID IID = Intrinsic::x86_encodekey128; + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); + + for (int i = 0; i < 6; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[2], i * 16); + Ptr = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Extract->getType())); + Builder.CreateAlignedStore(Extract, Ptr, Align(1)); + } + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_encodekey256_u32: { + Intrinsic::ID IID = Intrinsic::x86_encodekey256; + + Value *Call = + Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); + + for (int i = 0; i < 7; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[3], i * 16); + Ptr = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Extract->getType())); + Builder.CreateAlignedStore(Extract, Ptr, Align(1)); + } + + return Builder.CreateExtractValue(Call, 0); + } case X86::BI__builtin_ia32_aesenc128kl: case X86::BI__builtin_ia32_aesdec128kl: case X86::BI__builtin_ia32_aesenc256kl: @@ -14056,18 +14085,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_encodekey128: - ID = Intrinsic::x86_encodekey128; - InOps = {Ops[0], Ops[1]}; - FirstReturnOp = 2; - ResultCount = 6; - break; - case X86::BI__builtin_ia32_encodekey256: - ID = Intrinsic::x86_encodekey256; - InOps = {Ops[0], Ops[1], Ops[2]}; - FirstReturnOp = 3; - ResultCount = 7; - break; case X86::BI__builtin_ia32_aesenc128kl: case X86::BI__builtin_ia32_aesdec128kl: case X86::BI__builtin_ia32_aesenc256kl: diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index 718771c869cc4..c31ba16122a5b 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -132,15 +132,7 @@ _mm_loadiwkey (unsigned int __ctl, __m128i __intkey, /// \endoperation static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { - __m128i *__results = (__m128i*)__h; - - return __builtin_ia32_encodekey128(__htype, __key, - __results, - __results + 1, - __results + 2, - __results + 3, - __results + 4, - __results + 5); + return __builtin_ia32_encodekey128_u32(__htype, (__v2di)__key, __h); } /// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then @@ -181,16 +173,8 @@ _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, void *__h) { - __m128i *__results = (__m128i*)__h; - - return __builtin_ia32_encodekey256(__htype, __key_lo, __key_hi, - __results, - __results + 1, - __results + 2, - __results + 3, - __results + 4, - __results + 5, - __results + 6); + return __builtin_ia32_encodekey256_u32(__htype, (__v2di)__key_lo, + (__v2di)__key_hi, __h); } /// The AESENC128KL performs 10 rounds of AES to encrypt the __idata using diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c index 835bdd279ef1f..b410d53b4b83c 100644 --- a/clang/test/CodeGen/X86/keylocker.c +++ b/clang/test/CodeGen/X86/keylocker.c @@ -14,12 +14,64 @@ void test_loadiwkey(unsigned int ctl, __m128i intkey, __m128i enkey_lo, __m128i unsigned int test_encodekey128_u32(unsigned int htype, __m128i key, void *h) { //CHECK-LABEL: @test_encodekey128_u32 //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: itcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 16 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 32 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 48 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 64 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 80 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_encodekey128_u32(htype, key, h); } unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i key_hi, void *h) { //CHECK-LABEL: @test_encodekey256_u32 //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: itcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 16 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 32 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 48 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 64 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 80 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 96 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_encodekey256_u32(htype, key_lo, key_hi, h); } diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll new file mode 100644 index 0000000000000..b5518ec44dc22 --- /dev/null +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s + +; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c + +define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { +; CHECK-LABEL: test_loadiwkey: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: loadiwkey %xmm2, %xmm1 +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl) + ret void +} + +define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, i8* nocapture %h) { +; CHECK-LABEL: test_encodekey128_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: encodekey128 %edi, %eax +; CHECK-NEXT: movups %xmm0, (%rsi) +; CHECK-NEXT: movups %xmm1, 16(%rsi) +; CHECK-NEXT: movups %xmm2, 32(%rsi) +; CHECK-NEXT: movups %xmm4, 48(%rsi) +; CHECK-NEXT: movups %xmm5, 64(%rsi) +; CHECK-NEXT: movups %xmm6, 80(%rsi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) + %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + %2 = bitcast i8* %h to <2 x i64>* + store <2 x i64> %1, <2 x i64>* %2, align 1 + %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + %4 = getelementptr i8, i8* %h, i64 16 + %5 = bitcast i8* %4 to <2 x i64>* + store <2 x i64> %3, <2 x i64>* %5, align 1 + %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + %7 = getelementptr i8, i8* %h, i64 32 + %8 = bitcast i8* %7 to <2 x i64>* + store <2 x i64> %6, <2 x i64>* %8, align 1 + %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + %10 = getelementptr i8, i8* %h, i64 48 + %11 = bitcast i8* %10 to <2 x i64>* + store <2 x i64> %9, <2 x i64>* %11, align 1 + %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + %13 = getelementptr i8, i8* %h, i64 64 + %14 = bitcast i8* %13 to <2 x i64>* + store <2 x i64> %12, <2 x i64>* %14, align 1 + %15 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + %16 = getelementptr i8, i8* %h, i64 80 + %17 = bitcast i8* %16 to <2 x i64>* + store <2 x i64> %15, <2 x i64>* %17, align 1 + %18 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %18 +} + +define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, i8* nocapture %h) { +; CHECK-LABEL: test_encodekey256_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: encodekey256 %edi, %eax +; CHECK-NEXT: movups %xmm0, (%rsi) +; CHECK-NEXT: movups %xmm1, 16(%rsi) +; CHECK-NEXT: movups %xmm2, 32(%rsi) +; CHECK-NEXT: movups %xmm3, 48(%rsi) +; CHECK-NEXT: movups %xmm4, 64(%rsi) +; CHECK-NEXT: movups %xmm5, 80(%rsi) +; CHECK-NEXT: movups %xmm6, 96(%rsi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) + %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + %2 = bitcast i8* %h to <2 x i64>* + store <2 x i64> %1, <2 x i64>* %2, align 1 + %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + %4 = getelementptr i8, i8* %h, i64 16 + %5 = bitcast i8* %4 to <2 x i64>* + store <2 x i64> %3, <2 x i64>* %5, align 1 + %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + %7 = getelementptr i8, i8* %h, i64 32 + %8 = bitcast i8* %7 to <2 x i64>* + store <2 x i64> %6, <2 x i64>* %8, align 1 + %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + %10 = getelementptr i8, i8* %h, i64 48 + %11 = bitcast i8* %10 to <2 x i64>* + store <2 x i64> %9, <2 x i64>* %11, align 1 + %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + %13 = getelementptr i8, i8* %h, i64 64 + %14 = bitcast i8* %13 to <2 x i64>* + store <2 x i64> %12, <2 x i64>* %14, align 1 + %15 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + %16 = getelementptr i8, i8* %h, i64 80 + %17 = bitcast i8* %16 to <2 x i64>* + store <2 x i64> %15, <2 x i64>* %17, align 1 + %18 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + %19 = getelementptr i8, i8* %h, i64 96 + %20 = bitcast i8* %19 to <2 x i64>* + store <2 x i64> %18, <2 x i64>* %20, align 1 + %21 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %21 +} + +declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) +declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) +declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) From a02b449bb1556fe0f17b86eaa69f6bcda945d123 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 12:09:41 -0700 Subject: [PATCH 506/544] [X86] Sync AESENC/DEC Key Locker builtins with gcc. For the wide builtins, pass a single input and output pointer to the builtins. Emit the GEPs and input loads from CGBuiltin. --- clang/include/clang/Basic/BuiltinsX86.def | 26 +- clang/lib/CodeGen/CGBuiltin.cpp | 114 +++--- clang/lib/Headers/keylockerintrin.h | 84 +---- clang/test/CodeGen/X86/keylocker.c | 176 ++++++++- .../X86/keylocker-intrinsics-fast-isel.ll | 340 ++++++++++++++++++ 5 files changed, 587 insertions(+), 153 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index c33026139b3cf..8f9cfe4b6dc56 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1902,22 +1902,16 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") // KEY LOCKER TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, - "UiUiV2Oiv*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, - "UiUiV2OiV2Oiv*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesdec256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesencwide128kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") -TARGET_BUILTIN(__builtin_ia32_aesencwide256kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") -TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") -TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, "UiUiV2Oiv*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, "UiUiV2OiV2Oiv*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesenc128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesenc256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesdec128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesdec256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesencwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesencwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") // SERIALIZE TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d3603579844dc..dc3cafa5d062c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14070,75 +14070,67 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractValue(Call, 0); } - case X86::BI__builtin_ia32_aesenc128kl: - case X86::BI__builtin_ia32_aesdec128kl: - case X86::BI__builtin_ia32_aesenc256kl: - case X86::BI__builtin_ia32_aesdec256kl: - case X86::BI__builtin_ia32_aesencwide128kl: - case X86::BI__builtin_ia32_aesdecwide128kl: - case X86::BI__builtin_ia32_aesencwide256kl: - case X86::BI__builtin_ia32_aesdecwide256kl: { - int FirstReturnOp; - int ResultCount; - SmallVector InOps; - unsigned ID; - + case X86::BI__builtin_ia32_aesenc128kl_u8: + case X86::BI__builtin_ia32_aesdec128kl_u8: + case X86::BI__builtin_ia32_aesenc256kl_u8: + case X86::BI__builtin_ia32_aesdec256kl_u8: { + Intrinsic::ID IID; switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_aesenc128kl: - case X86::BI__builtin_ia32_aesdec128kl: - case X86::BI__builtin_ia32_aesenc256kl: - case X86::BI__builtin_ia32_aesdec256kl: { - InOps = {Ops[1], Ops[2]}; - FirstReturnOp = 0; - ResultCount = 1; - switch (BuiltinID) { - case X86::BI__builtin_ia32_aesenc128kl: - ID = Intrinsic::x86_aesenc128kl; - break; - case X86::BI__builtin_ia32_aesdec128kl: - ID = Intrinsic::x86_aesdec128kl; - break; - case X86::BI__builtin_ia32_aesenc256kl: - ID = Intrinsic::x86_aesenc256kl; - break; - case X86::BI__builtin_ia32_aesdec256kl: - ID = Intrinsic::x86_aesdec256kl; - break; - } + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_aesenc128kl_u8: + IID = Intrinsic::x86_aesenc128kl; + break; + case X86::BI__builtin_ia32_aesdec128kl_u8: + IID = Intrinsic::x86_aesdec128kl; + break; + case X86::BI__builtin_ia32_aesenc256kl_u8: + IID = Intrinsic::x86_aesenc256kl; + break; + case X86::BI__builtin_ia32_aesdec256kl_u8: + IID = Intrinsic::x86_aesdec256kl; break; } - case X86::BI__builtin_ia32_aesencwide128kl: - case X86::BI__builtin_ia32_aesdecwide128kl: - case X86::BI__builtin_ia32_aesencwide256kl: - case X86::BI__builtin_ia32_aesdecwide256kl: { - InOps = {Ops[0], Ops[9], Ops[10], Ops[11], Ops[12], Ops[13], - Ops[14], Ops[15], Ops[16]}; - FirstReturnOp = 1; - ResultCount = 8; - switch (BuiltinID) { - case X86::BI__builtin_ia32_aesencwide128kl: - ID = Intrinsic::x86_aesencwide128kl; - break; - case X86::BI__builtin_ia32_aesdecwide128kl: - ID = Intrinsic::x86_aesdecwide128kl; - break; - case X86::BI__builtin_ia32_aesencwide256kl: - ID = Intrinsic::x86_aesencwide256kl; - break; - case X86::BI__builtin_ia32_aesdecwide256kl: - ID = Intrinsic::x86_aesdecwide256kl; - break; - } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); + + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[0]); + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_aesencwide128kl_u8: + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + case X86::BI__builtin_ia32_aesencwide256kl_u8: + case X86::BI__builtin_ia32_aesdecwide256kl_u8: { + Intrinsic::ID IID; + switch (BuiltinID) { + case X86::BI__builtin_ia32_aesencwide128kl_u8: + IID = Intrinsic::x86_aesencwide128kl; + break; + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + IID = Intrinsic::x86_aesdecwide128kl; + break; + case X86::BI__builtin_ia32_aesencwide256kl_u8: + IID = Intrinsic::x86_aesencwide256kl; + break; + case X86::BI__builtin_ia32_aesdecwide256kl_u8: + IID = Intrinsic::x86_aesdecwide256kl; break; } + + Value *InOps[9]; + InOps[0] = Ops[2]; + for (int i = 0; i != 8; ++i) { + Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i); + InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16)); } - Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), InOps); + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); - for (int i = 0; i < ResultCount; ++i) { - Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, i + 1), - Ops[FirstReturnOp + i]); + for (int i = 0; i != 8; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i); + Builder.CreateAlignedStore(Extract, Ptr, Align(16)); } return Builder.CreateExtractValue(Call, 0); diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index c31ba16122a5b..c15d39c8e3928 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -211,7 +211,7 @@ _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesenc128kl(__odata, __idata, __h); + return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using @@ -248,7 +248,7 @@ _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesenc256kl(__odata, __idata, __h); + return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using @@ -285,7 +285,7 @@ _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesdec128kl(__odata, __idata, __h); + return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using @@ -322,7 +322,7 @@ _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesdec256kl(__odata, __idata, __h); + return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } #undef __DEFAULT_FN_ATTRS @@ -374,23 +374,8 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesencwide128kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } /// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle @@ -429,23 +414,8 @@ _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesencwide256kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle @@ -484,23 +454,8 @@ _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesdecwide128kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle @@ -539,23 +494,8 @@ _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesdecwide256kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } #undef __DEFAULT_FN_ATTRS diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c index b410d53b4b83c..b87fe22d77617 100644 --- a/clang/test/CodeGen/X86/keylocker.c +++ b/clang/test/CodeGen/X86/keylocker.c @@ -78,47 +78,215 @@ unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i k unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesenc256kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesenc256kl_u8(odata, idata, h); } unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesdec256kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesdec256kl_u8(odata, idata, h); } unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesenc128kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesenc128kl_u8(odata, idata, h); } unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesdec128kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesdec128kl_u8(odata, idata, h); } unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesencwide128kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesencwide128kl_u8(odata, idata, h); } unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesdecwide128kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesdecwide128kl_u8(odata, idata, h); } unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesencwide256kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesencwide256kl_u8(odata, idata, h); } unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesdecwide256kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesdecwide256kl_u8(odata, idata, h); } diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll index b5518ec44dc22..a2443ffbc4e65 100644 --- a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll @@ -99,6 +99,346 @@ entry: ret i32 %21 } +define zeroext i8 @test_mm_aesenc256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesenc256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesenc256kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test_mm_aesdec256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesdec256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdec256kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test_mm_aesenc128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesenc128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesenc128kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test_mm_aesdec128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesdec128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdec128kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test__mm_aesencwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesencwide128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesencwide128kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + +define zeroext i8 @test__mm_aesdecwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesdecwide128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdecwide128kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + +define zeroext i8 @test__mm_aesencwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesencwide256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesencwide256kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + +define zeroext i8 @test__mm_aesdecwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesdecwide256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdecwide256kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) From 955b926b0b37ee3f56d32a90702cea9878eb9c78 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sun, 4 Oct 2020 19:10:39 +0000 Subject: [PATCH 507/544] [gn build] Port 6c6cd5f8a97 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 811faf52b1831..58fe63d10294c 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -143,7 +143,6 @@ copy("Headers") { "inttypes.h", "invpcidintrin.h", "iso646.h", - "keylocker_wide_intrin.h", "keylockerintrin.h", "limits.h", "lwpintrin.h", From c36d441b6b64e412a975adb1401657870588563c Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 4 Oct 2020 12:06:06 -0700 Subject: [PATCH 508/544] [SDA] Fix -Wunused-function in -DLLVM_ENABLE_ASSERTIONS=off builds --- llvm/lib/Analysis/SyncDependenceAnalysis.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp index 0771bb52c4f47..67a1365b698da 100644 --- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp @@ -417,6 +417,7 @@ struct DivergencePropagator { } }; +#ifndef NDEBUG static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) { Out << "["; bool First = true; @@ -428,6 +429,7 @@ static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) { } Out << "]"; } +#endif const ControlDivergenceDesc & SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) { From b4288f278a254e597295001f7e544b680b54e61a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 4 Oct 2020 22:30:55 +0300 Subject: [PATCH 509/544] [X86] Remove an accidentally added file. NFC. This file seems to have been accidentally added as part of commit 413577a8790407d75ba834fa5668c2632fe1851e. --- llvm/lib/Target/X86/X86InstrInfo.td.rej | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 llvm/lib/Target/X86/X86InstrInfo.td.rej diff --git a/llvm/lib/Target/X86/X86InstrInfo.td.rej b/llvm/lib/Target/X86/X86InstrInfo.td.rej deleted file mode 100644 index 5c0a632b55a70..0000000000000 --- a/llvm/lib/Target/X86/X86InstrInfo.td.rej +++ /dev/null @@ -1,11 +0,0 @@ -diff a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td (rejected hunks) -@@ -3092,6 +3094,9 @@ include "X86InstrSVM.td" - include "X86InstrTSX.td" - include "X86InstrSGX.td" - -+// Key Locker instructions -+include "X86InstrKL.td" -+ - // AMX instructions - include "X86InstrAMX.td" - From 22664a325167086260aa2d1ff629686020f2b27d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 4 Oct 2020 21:46:50 +0200 Subject: [PATCH 510/544] [MemCpyOpt] Don't use array allocas in tests (NFC) Apparently querying dereferenceability of array allocations is being intentionally penalized (https://reviews.llvm.org/D41398), so avoid using them in tests. --- llvm/test/Transforms/MemCpyOpt/callslot.ll | 116 ++++++++++++--------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 4b65fbcf88c23..5268dd9c8da04 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -3,86 +3,106 @@ define i8 @read_dest_between_call_and_memcpy() { ; CHECK-LABEL: @read_dest_between_call_and_memcpy( -; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: store i8 1, i8* [[DEST]], align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[DEST]], align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: store i8 1, i8* [[DEST_I8]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[DEST_I8]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret i8 [[X]] ; - %dest = alloca i8, i64 16 - %src = alloca i8, i64 16 - store i8 1, i8* %dest - call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) - %x = load i8, i8* %dest - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + store i8 1, i8* %dest.i8 + call void @llvm.memset.p0i8.i64(i8* %src.i8, i8 0, i64 16, i1 false) + %x = load i8, i8* %dest.i8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) ret i8 %x } define i8 @read_src_between_call_and_memcpy() { ; CHECK-LABEL: @read_src_between_call_and_memcpy( -; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[SRC]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i1 false) +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 16, i1 false) ; CHECK-NEXT: ret i8 [[X]] ; - %dest = alloca i8, i64 16 - %src = alloca i8, i64 16 - call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) - %x = load i8, i8* %src - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @llvm.memset.p0i8.i64(i8* %src.i8, i8 0, i64 16, i1 false) + %x = load i8, i8* %src.i8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) ret i8 %x } define void @write_dest_between_call_and_memcpy() { ; CHECK-LABEL: @write_dest_between_call_and_memcpy( -; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: store i8 1, i8* [[DEST]], align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, i8* [[DEST_I8]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret void ; - %dest = alloca i8, i64 16 - %src = alloca i8, i64 16 - call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) - store i8 1, i8* %dest - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @llvm.memset.p0i8.i64(i8* %src.i8, i8 0, i64 16, i1 false) + store i8 1, i8* %dest.i8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) ret void } define void @write_src_between_call_and_memcpy() { ; CHECK-LABEL: @write_src_between_call_and_memcpy( -; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: store i8 1, i8* [[SRC]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i1 false) +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, i8* [[SRC_I8]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 16, i1 false) ; CHECK-NEXT: ret void ; - %dest = alloca i8, i64 16 - %src = alloca i8, i64 16 - call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) - store i8 1, i8* %src - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @llvm.memset.p0i8.i64(i8* %src.i8, i8 0, i64 16, i1 false) + store i8 1, i8* %src.i8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) ret void } ; TODO: This is a miscompile. -define void @throw_between_call_and_mempy(i8* dereferenceable(16) %dest) { +define void @throw_between_call_and_mempy(i8* dereferenceable(16) %dest.i8) { ; CHECK-LABEL: @throw_between_call_and_mempy( -; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 16, align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8:%.*]] to [16 x i8]* +; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [16 x i8]* [[DEST_I81]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I812]], i8 0, i64 16, i1 false) ; CHECK-NEXT: call void @may_throw() [[ATTR2:#.*]] ; CHECK-NEXT: ret void ; - %src = alloca i8, i64 16 - call void @llvm.memset.p0i8.i64(i8* %src, i8 0, i64 16, i1 false) + %src = alloca [16 x i8] + %src.i8 = bitcast [16 x i8]* %src to i8* + call void @llvm.memset.p0i8.i64(i8* %src.i8, i8 0, i64 16, i1 false) call void @may_throw() readnone - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) ret void } From f05173d0bfc268530afd9fb9622caf7f1db5f8d6 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 3 Oct 2020 03:30:21 +0000 Subject: [PATCH 511/544] Implement callee/caller type checking for llvm.call This aligns the behavior with the standard call as well as the LLVM verifier. Reviewed By: ftynse, dcaballe Differential Revision: https://reviews.llvm.org/D88362 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 6 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 76 ++++++++++++++++++++- mlir/test/Dialect/LLVMIR/invalid.mlir | 69 +++++++++++++++++++ 3 files changed, 145 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index b5b8e45eb21f2..94f6809349f19 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -417,11 +417,7 @@ def LLVM_CallOp : LLVM_Op<"call">, $_state.addAttributes(attributes); $_state.addOperands(operands); }]>]; - let verifier = [{ - if (getNumResults() > 1) - return emitOpError("must have 0 or 1 result"); - return success(); - }]; + let verifier = [{ return ::verify(*this); }]; let parser = [{ return parseCallOp(parser, result); }]; let printer = [{ printCallOp(p, *this); }]; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index e1f7aede782df..8da2e0b363003 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -531,9 +531,83 @@ static ParseResult parseLandingpadOp(OpAsmParser &parser, } //===----------------------------------------------------------------------===// -// Printing/parsing for LLVM::CallOp. +// Verifying/Printing/parsing for LLVM::CallOp. //===----------------------------------------------------------------------===// +static LogicalResult verify(CallOp &op) { + if (op.getNumResults() > 1) + return op.emitOpError("must have 0 or 1 result"); + + // Type for the callee, we'll get it differently depending if it is a direct + // or indirect call. + LLVMType fnType; + + bool isIndirect = false; + + // If this is an indirect call, the callee attribute is missing. + Optional calleeName = op.callee(); + if (!calleeName) { + isIndirect = true; + if (!op.getNumOperands()) + return op.emitOpError( + "must have either a `callee` attribute or at least an operand"); + fnType = op.getOperand(0).getType().dyn_cast(); + if (!fnType) + return op.emitOpError("indirect call to a non-llvm type: ") + << op.getOperand(0).getType(); + auto ptrType = fnType.dyn_cast(); + if (!ptrType) + return op.emitOpError("indirect call expects a pointer as callee: ") + << fnType; + fnType = ptrType.getElementType(); + } else { + Operation *callee = SymbolTable::lookupNearestSymbolFrom(op, *calleeName); + if (!callee) + return op.emitOpError() + << "'" << *calleeName + << "' does not reference a symbol in the current scope"; + auto fn = dyn_cast(callee); + if (!fn) + return op.emitOpError() << "'" << *calleeName + << "' does not reference a valid LLVM function"; + + fnType = fn.getType(); + } + if (!fnType.isFunctionTy()) + return op.emitOpError("callee does not have a functional type: ") << fnType; + + // Verify that the operand and result types match the callee. + + if (!fnType.isFunctionVarArg() && + fnType.getFunctionNumParams() != (op.getNumOperands() - isIndirect)) + return op.emitOpError() + << "incorrect number of operands (" + << (op.getNumOperands() - isIndirect) + << ") for callee (expecting: " << fnType.getFunctionNumParams() + << ")"; + + if (fnType.getFunctionNumParams() > (op.getNumOperands() - isIndirect)) + return op.emitOpError() << "incorrect number of operands (" + << (op.getNumOperands() - isIndirect) + << ") for varargs callee (expecting at least: " + << fnType.getFunctionNumParams() << ")"; + + for (unsigned i = 0, e = fnType.getFunctionNumParams(); i != e; ++i) + if (op.getOperand(i + isIndirect).getType() != + fnType.getFunctionParamType(i)) + return op.emitOpError() << "operand type mismatch for operand " << i + << ": " << op.getOperand(i + isIndirect).getType() + << " != " << fnType.getFunctionParamType(i); + + if (op.getNumResults() && + op.getResult(0).getType() != fnType.getFunctionResultType()) + return op.emitOpError() + << "result type mismatch: " << op.getResult(0).getType() + << " != " << fnType.getFunctionResultType(); + + return success(); +} + static void printCallOp(OpAsmPrinter &p, CallOp &op) { auto callee = op.callee(); bool isDirect = callee.hasValue(); diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index c19795e98b686..322d5397a4176 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -125,6 +125,75 @@ func @call_non_function_type(%callee : !llvm.func, %arg : !llvm.i8) { // ----- +func @invalid_call() { + // expected-error@+1 {{'llvm.call' op must have either a `callee` attribute or at least an operand}} + "llvm.call"() : () -> () +} + +// ----- + +func @call_non_function_type(%callee : !llvm.func, %arg : !llvm.i8) { + // expected-error@+1 {{expected function type}} + llvm.call %callee(%arg) : !llvm.func +} + +// ----- + +func @call_unknown_symbol() { + // expected-error@+1 {{'llvm.call' op 'missing_callee' does not reference a symbol in the current scope}} + llvm.call @missing_callee() : () -> () +} + +// ----- + +func @standard_func_callee() + +func @call_non_llvm() { + // expected-error@+1 {{'llvm.call' op 'standard_func_callee' does not reference a valid LLVM function}} + llvm.call @standard_func_callee() : () -> () +} + +// ----- + +func @call_non_llvm_indirect(%arg0 : i32) { + // expected-error@+1 {{'llvm.call' op operand #0 must be LLVM dialect type, but got 'i32'}} + "llvm.call"(%arg0) : (i32) -> () +} + +// ----- + +llvm.func @callee_func(!llvm.i8) -> () + +func @callee_arg_mismatch(%arg0 : !llvm.i32) { + // expected-error@+1 {{'llvm.call' op operand type mismatch for operand 0: '!llvm.i32' != '!llvm.i8'}} + llvm.call @callee_func(%arg0) : (!llvm.i32) -> () +} + +// ----- + +func @indirect_callee_arg_mismatch(%arg0 : !llvm.i32, %callee : !llvm.ptr>) { + // expected-error@+1 {{'llvm.call' op operand type mismatch for operand 0: '!llvm.i32' != '!llvm.i8'}} + "llvm.call"(%callee, %arg0) : (!llvm.ptr>, !llvm.i32) -> () +} + +// ----- + +llvm.func @callee_func() -> (!llvm.i8) + +func @callee_return_mismatch() { + // expected-error@+1 {{'llvm.call' op result type mismatch: '!llvm.i32' != '!llvm.i8'}} + %res = llvm.call @callee_func() : () -> (!llvm.i32) +} + +// ----- + +func @indirect_callee_return_mismatch(%callee : !llvm.ptr>) { + // expected-error@+1 {{'llvm.call' op result type mismatch: '!llvm.i32' != '!llvm.i8'}} + "llvm.call"(%callee) : (!llvm.ptr>) -> (!llvm.i32) +} + +// ----- + func @call_too_many_results(%callee : () -> (i32,i32)) { // expected-error@+1 {{expected function with 0 or 1 result}} llvm.call %callee() : () -> (i32, i32) From 8aaa73134954b5e5ad51631665fd188f38ded75e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 4 Oct 2020 22:25:28 +0200 Subject: [PATCH 512/544] [MemCpyOpt] Add tests for call slot optimization with GEPs (NFC) --- llvm/test/Transforms/MemCpyOpt/callslot.ll | 59 +++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 5268dd9c8da04..a4cfd53f4d249 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -87,7 +87,6 @@ define void @write_src_between_call_and_memcpy() { ret void } -; TODO: This is a miscompile. define void @throw_between_call_and_mempy(i8* dereferenceable(16) %dest.i8) { ; CHECK-LABEL: @throw_between_call_and_mempy( ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 @@ -106,6 +105,64 @@ define void @throw_between_call_and_mempy(i8* dereferenceable(16) %dest.i8) { ret void } +define void @dest_is_gep_nounwind_call() { +; CHECK-LABEL: @dest_is_gep_nounwind_call( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* +; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 +; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3:#.*]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [8 x i8] + %src.i8 = bitcast [8 x i8]* %src to i8* + %dest.i8 = getelementptr [16 x i8], [16 x i8]* %dest, i64 0, i64 8 + call void @accept_ptr(i8* %src.i8) nounwind + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 8, i1 false) + ret void +} + +define void @dest_is_gep_may_throw_call() { +; CHECK-LABEL: @dest_is_gep_may_throw_call( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* +; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 +; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [8 x i8] + %src.i8 = bitcast [8 x i8]* %src to i8* + %dest.i8 = getelementptr [16 x i8], [16 x i8]* %dest, i64 0, i64 8 + call void @accept_ptr(i8* %src.i8) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 8, i1 false) + ret void +} + +define void @dest_is_gep_requires_movement() { +; CHECK-LABEL: @dest_is_gep_requires_movement( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3]] +; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [8 x i8] + %src.i8 = bitcast [8 x i8]* %src to i8* + call void @accept_ptr(i8* %src.i8) nounwind + %dest.i8 = getelementptr [16 x i8], [16 x i8]* %dest, i64 0, i64 8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 8, i1 false) + ret void +} + declare void @may_throw() +declare void @accept_ptr(i8*) declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) From 8036cf7f5402ea7fc8564a9a2beae512c324bf3d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 4 Oct 2020 12:28:37 -0700 Subject: [PATCH 513/544] llvm-dwarfdump: Skip tombstoned address ranges Make the dumper & API a bit more informative by using the new tombstone addresses to filter out or otherwise render more explicitly dead code ranges. --- .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 2 +- llvm/include/llvm/BinaryFormat/Dwarf.h | 7 + .../llvm/DebugInfo/DWARF/DWARFDebugRnglists.h | 1 + llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 44 +- .../DebugInfo/DWARF/DWARFDebugRangeList.cpp | 7 + .../DebugInfo/DWARF/DWARFDebugRnglists.cpp | 24 +- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 17 +- .../test/tools/llvm-dwarfdump/X86/tombstone.s | 545 ++++++++++++++++++ 8 files changed, 615 insertions(+), 32 deletions(-) create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/tombstone.s diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index dfa40759a7ff0..b70beb5019467 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -946,7 +946,7 @@ DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) { llvm::Expected llvm_ranges = range_list_or_error->getAbsoluteRanges( llvm::object::SectionedAddress{GetBaseAddress()}, - [&](uint32_t index) { + GetAddressByteSize(), [&](uint32_t index) { uint32_t index_size = GetAddressByteSize(); dw_offset_t addr_base = GetAddrBase(); lldb::offset_t offset = addr_base + index * index_size; diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 6558a74b64111..75b8b2647b95f 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -27,6 +27,8 @@ #include "llvm/Support/FormatVariadicDetails.h" #include "llvm/ADT/Triple.h" +#include + namespace llvm { class StringRef; @@ -745,6 +747,11 @@ template <> struct EnumTraits : public std::true_type { static constexpr char Type[3] = "OP"; static constexpr StringRef (*StringFn)(unsigned) = &OperationEncodingString; }; + +inline uint64_t computeTombstoneAddress(uint8_t AddressByteSize) { + return std::numeric_limits::max() >> (8 - AddressByteSize) * 8; +} + } // End of namespace dwarf /// Dwarf constants format_provider diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index 78a018ff482b6..4d28bdcde2e49 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -48,6 +48,7 @@ class DWARFDebugRnglist : public DWARFListType { /// Build a DWARFAddressRangesVector from a rangelist. DWARFAddressRangesVector getAbsoluteRanges(Optional BaseAddr, + uint8_t AddressByteSize, function_ref(uint32_t)> LookupPooledAddress) const; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 427d25f5011a2..ab3022955cdbd 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -783,6 +783,18 @@ Error DWARFDebugLine::LineTable::parse( *OS << '\n'; Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0); } + bool TombstonedAddress = false; + auto EmitRow = [&] { + if (!TombstonedAddress) { + if (Verbose) { + *OS << "\n"; + OS->indent(12); + } + if (OS) + State.Row.dump(*OS); + State.appendRowToMatrix(); + } + }; while (*OffsetPtr < EndOffset) { DataExtractor::Cursor Cursor(*OffsetPtr); @@ -834,13 +846,7 @@ Error DWARFDebugLine::LineTable::parse( // No need to test the Cursor is valid here, since it must be to get // into this code path - if it were invalid, the default case would be // followed. - if (Verbose) { - *OS << "\n"; - OS->indent(12); - } - if (OS) - State.Row.dump(*OS); - State.appendRowToMatrix(); + EmitRow(); State.resetRowAndSequence(); break; @@ -882,6 +888,10 @@ Error DWARFDebugLine::LineTable::parse( State.Row.Address.Address = TableData.getRelocatedAddress( Cursor, &State.Row.Address.SectionIndex); + uint64_t Tombstone = + dwarf::computeTombstoneAddress(OpcodeAddressSize); + TombstonedAddress = State.Row.Address.Address == Tombstone; + // Restore the address size if the extractor already had it. if (ExtractorAddressSize != 0) TableData.setAddressSize(ExtractorAddressSize); @@ -981,13 +991,7 @@ Error DWARFDebugLine::LineTable::parse( case DW_LNS_copy: // Takes no arguments. Append a row to the matrix using the // current values of the state-machine registers. - if (Verbose) { - *OS << "\n"; - OS->indent(12); - } - if (OS) - State.Row.dump(*OS); - State.appendRowToMatrix(); + EmitRow(); break; case DW_LNS_advance_pc: @@ -1152,15 +1156,9 @@ Error DWARFDebugLine::LineTable::parse( ParsingState::AddrAndLineDelta Delta = State.handleSpecialOpcode(Opcode, OpcodeOffset); - if (Verbose) { - *OS << "address += " << Delta.Address << ", line += " << Delta.Line - << "\n"; - OS->indent(12); - } - if (OS) - State.Row.dump(*OS); - - State.appendRowToMatrix(); + if (Verbose) + *OS << "address += " << Delta.Address << ", line += " << Delta.Line; + EmitRow(); *OffsetPtr = Cursor.tell(); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index 1a1857d8cd795..dc7da5d9348fc 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -70,6 +70,9 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const { DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges( llvm::Optional BaseAddr) const { DWARFAddressRangesVector Res; + // debug_addr can't use the max integer tombstone because that's used for the + // base address specifier entry - so use max-1. + uint64_t Tombstone = dwarf::computeTombstoneAddress(AddressSize) - 1; for (const RangeListEntry &RLE : Entries) { if (RLE.isBaseAddressSelectionEntry(AddressSize)) { BaseAddr = {RLE.EndAddress, RLE.SectionIndex}; @@ -78,12 +81,16 @@ DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges( DWARFAddressRange E; E.LowPC = RLE.StartAddress; + if (E.LowPC == Tombstone) + continue; E.HighPC = RLE.EndAddress; E.SectionIndex = RLE.SectionIndex; // Base address of a range list entry is determined by the closest preceding // base address selection entry in the same range list. It defaults to the // base address of the compilation unit if there is no such entry. if (BaseAddr) { + if (BaseAddr->Address == Tombstone) + continue; E.LowPC += BaseAddr->Address; E.HighPC += BaseAddr->Address; if (E.SectionIndex == -1ULL) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp index d517e51e7e369..a8e7cdeeafbc1 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp @@ -89,16 +89,17 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t *OffsetPtr) { DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( llvm::Optional BaseAddr, DWARFUnit &U) const { - return getAbsoluteRanges(BaseAddr, [&](uint32_t Index) { - return U.getAddrOffsetSectionItem(Index); - }); + return getAbsoluteRanges( + BaseAddr, U.getAddressByteSize(), + [&](uint32_t Index) { return U.getAddrOffsetSectionItem(Index); }); } DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( - Optional BaseAddr, + Optional BaseAddr, uint8_t AddressByteSize, function_ref(uint32_t)> LookupPooledAddress) const { DWARFAddressRangesVector Res; + uint64_t Tombstone = dwarf::computeTombstoneAddress(AddressByteSize); for (const RangeListEntry &RLE : Entries) { if (RLE.EntryKind == dwarf::DW_RLE_end_of_list) break; @@ -121,8 +122,12 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( switch (RLE.EntryKind) { case dwarf::DW_RLE_offset_pair: E.LowPC = RLE.Value0; + if (E.LowPC == Tombstone) + continue; E.HighPC = RLE.Value1; if (BaseAddr) { + if (BaseAddr->Address == Tombstone) + continue; E.LowPC += BaseAddr->Address; E.HighPC += BaseAddr->Address; } @@ -149,6 +154,8 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( // so we should not run into any here. llvm_unreachable("Unsupported range list encoding"); } + if (E.LowPC == Tombstone) + continue; Res.push_back(E); } return Res; @@ -181,6 +188,8 @@ void RangeListEntry::dump( OS << ": "; } + uint64_t Tombstone = dwarf::computeTombstoneAddress(AddrSize); + switch (EntryKind) { case dwarf::DW_RLE_end_of_list: OS << (DumpOpts.Verbose ? "" : ""); @@ -208,8 +217,11 @@ void RangeListEntry::dump( break; case dwarf::DW_RLE_offset_pair: PrintRawEntry(OS, *this, AddrSize, DumpOpts); - DWARFAddressRange(Value0 + CurrentBase, Value1 + CurrentBase) - .dump(OS, AddrSize, DumpOpts); + if (CurrentBase != Tombstone) + DWARFAddressRange(Value0 + CurrentBase, Value1 + CurrentBase) + .dump(OS, AddrSize, DumpOpts); + else + OS << "dead code"; break; case dwarf::DW_RLE_start_end: DWARFAddressRange(Value0, Value1).dump(OS, AddrSize, DumpOpts); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 31340077a126d..04161e09d3e20 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -268,8 +268,18 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, WithColor(OS, Color) << Name; else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) OS << *FormValue.getAsUnsignedConstant(); - else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose && - FormValue.getAsUnsignedConstant()) { + else if (Attr == DW_AT_low_pc && + (FormValue.getAsAddress() == + dwarf::computeTombstoneAddress(U->getAddressByteSize()))) { + if (DumpOpts.Verbose) { + FormValue.dump(OS, DumpOpts); + OS << " ("; + } + OS << "dead code"; + if (DumpOpts.Verbose) + OS << ')'; + } else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose && + FormValue.getAsUnsignedConstant()) { if (DumpOpts.ShowAddresses) { // Print the actual address rather than the offset. uint64_t LowPC, HighPC, Index; @@ -415,6 +425,9 @@ Optional DWARFDie::getLocBaseAttribute() const { } Optional DWARFDie::getHighPC(uint64_t LowPC) const { + uint64_t Tombstone = dwarf::computeTombstoneAddress(U->getAddressByteSize()); + if (LowPC == Tombstone) + return None; if (auto FormValue = find(DW_AT_high_pc)) { if (auto Address = FormValue->getAsAddress()) { // High PC is an address. diff --git a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s new file mode 100644 index 0000000000000..85a88fc54d894 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s @@ -0,0 +1,545 @@ +# RUN: llvm-mc %s -filetype obj -triple i386-pc-linux -o %t.o +# RUN: not llvm-dwarfdump -v -debug-info -debug-line -debug-addr -debug-rnglists -debug-ranges %t.o | FileCheck --implicit-check-not=DW_TAG --implicit-check-not=DW_AT %s + +# FIXME: Remove the 'not' once the rnglist are lazily/correctly parsed (see comment below) + +# Test that llvm - dwarfdump strips addresses relating to dead code(using the +# DWARFv6 - proposed tombstone constant & nearest equivalent for debug_ranges) +# Testing the tombstone use in debug_info (addr/addrx), debug_ranges, +# debug_rnglists, debug_ranges, and debug_line. + +# CHECK-DAG: .debug_info contents: +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000 +# CHECK-NEXT: [0x00000042, 0x00000048)) +# CHECK: DW_TAG_subprogram +# FIXME: Print address using unit's address size. +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x00000000ffffffff (dead code)) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000042) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_addr_base +# CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x0000000c +# CHECK-NEXT: [0x00000042, 0x00000048) +# CHECK-NEXT: [0x00000042, 0x00000048) +# CHECK-NEXT: [0x00000042, 0x00000048) +# CHECK-NEXT: [0x00000042, 0x00000048) +# CHECK-NEXT: [0x00000042, 0x00000048)) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x00000000ffffffff (dead code)) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000001) address = 0x0000000000000042) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000018 +# CHECK-NEXT: [0x0000000000000042, 0x0000000000000048)) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0xffffffffffffffff (dead code)) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000042) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_addr_base + +# FIXME: Lazily parse rnglists rather than expecting to be able to parse an +# entire rnglists contribution (since there's no way to know where such a +# contribution starts) - rather than assuming one starts at 0. + +# CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000057) +# [0x0000000000000042, 0x0000000000000048) +# [0x0000000000000042, 0x0000000000000048) +# [0x0000000000000042, 0x0000000000000048) +# [0x0000000000000042, 0x0000000000000048) +# [0x0000000000000042, 0x0000000000000048)) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0xffffffffffffffff (dead code)) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000001) address = 0x0000000000000042) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) + +# CHECK-DAG: .debug_line contents: +# CHECK: Address Line +# CHECK-NEXT: -------------- +# FIXME: Dump the address with a size-appropriate encoding +# CHECK-NEXT: DW_LNE_set_address (0x00000000ffffffff) +# CHECK-NEXT: DW_LNS_copy +# CHECK-NEXT: DW_LNS_advance_pc (1) +# CHECK-NEXT: DW_LNE_end_sequence +# CHECK-NEXT: DW_LNE_set_address (0x0000000000000042) +# CHECK-NEXT: DW_LNS_copy +# CHECK-NEXT: 0x0000000000000042 1 +# CHECK-NEXT: DW_LNS_advance_pc (1) +# CHECK-NEXT: DW_LNE_end_sequence +# CHECK: Address Line +# CHECK-NEXT: -------------- +# CHECK-NEXT: DW_LNE_set_address (0xffffffffffffffff) +# CHECK-NEXT: DW_LNS_copy +# CHECK-NEXT: DW_LNS_advance_pc (1) +# CHECK-NEXT: DW_LNE_end_sequence +# CHECK-NEXT: DW_LNE_set_address (0x0000000000000042) +# CHECK-NEXT: DW_LNS_copy +# CHECK-NEXT: 0x0000000000000042 1 +# CHECK-NEXT: DW_LNS_advance_pc (1) +# CHECK-NEXT: DW_LNE_end_sequence + +# Dumping of the debug_addr, ranges, and rnglists sections don't do anything +# different with tombstoned addresses, but dump them just for +# documentation/comparison with the tombstone-filtered renderings in the +# debug_info section above + +# CHECK-DAG: .debug_addr contents: +# CHECK-NEXT: addr_size = 0x04 +# CHECK-NEXT: Addrs: [ +# CHECK-NEXT: 0xffffffff +# CHECK-NEXT: 0x00000042 +# CHECK-NEXT: ] +# CHECK-NEXT: addr_size = 0x08 +# CHECK-NEXT: Addrs: [ +# CHECK-NEXT: 0xffffffffffffffff +# CHECK-NEXT: 0x0000000000000042 +# CHECK-NEXT: ] + +# CHECK-DAG: .debug_ranges contents: +# CHECK-NEXT: fffffffe fffffffe +# CHECK-NEXT: 00000042 00000048 +# CHECK-NEXT: +# FIXME: Would be nice if we didn't assume all the contributions were of the +# same address size, instead dumping them based on the address size of +# the unit that references them. Maybe optimistically guessing at any +# unreferenced chunks. (this would be more like libdwarf/dwarfdump). +# But for now, these 64bit address ranges are mangled/being rendered +# here as though they were a 32 bit address range. +# CHECK-NEXT: fffffffe ffffffff +# CHECK-NEXT: fffffffe ffffffff +# CHECK-NEXT: 00000042 00000000 +# CHECK-NEXT: 00000048 00000000 +# CHECK-NEXT: + +# CHECK-DAG: .debug_rnglists contents: +# CHECK-NEXT: addr_size = 0x04 +# CHECK-NEXT: ranges: +# CHECK-NEXT: [DW_RLE_start_length ]: 0xffffffff, 0x00000006 +# CHECK-NEXT: [DW_RLE_start_length ]: 0x00000042, 0x00000006 +# CHECK-NEXT: [DW_RLE_startx_length]: 0x00000000, 0x00000006 +# CHECK-NEXT: [DW_RLE_startx_length]: 0x00000001, 0x00000006 +# CHECK-NEXT: [DW_RLE_start_end ]: [0xffffffff, 0xffffffff) +# CHECK-NEXT: [DW_RLE_start_end ]: [0x00000042, 0x00000048) +# CHECK-NEXT: [DW_RLE_base_address ]: 0x00000040 +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x00000002, 0x00000008 => [0x00000042, 0x00000048) +# CHECK-NEXT: [DW_RLE_base_address ]: 0xffffffff +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x00000002, 0x00000008 => dead code +# CHECK-NEXT: [DW_RLE_base_addressx]: 0x00000000 +# FIXME: Don't print "computed" values that aren't really computed/instead +# still refer to the index instead of the resulting address +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x00000000, 0x00000006 => [0x00000000, 0x00000006) +# CHECK-NEXT: [DW_RLE_base_addressx]: 0x00000001 +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x00000000, 0x00000006 => [0x00000001, 0x00000007) +# CHECK-NEXT: [DW_RLE_end_of_list ] +# CHECK-NEXT: addr_size = 0x08 +# CHECK-NEXT: ranges: +# CHECK-NEXT: [DW_RLE_start_length ]: 0xffffffffffffffff, 0x0000000000000006 +# CHECK-NEXT: [DW_RLE_start_length ]: 0x0000000000000042, 0x0000000000000006 +# CHECK-NEXT: [DW_RLE_startx_length]: 0x0000000000000000, 0x0000000000000006 +# CHECK-NEXT: [DW_RLE_startx_length]: 0x0000000000000001, 0x0000000000000006 +# CHECK-NEXT: [DW_RLE_start_end ]: [0xffffffffffffffff, 0xffffffffffffffff) +# CHECK-NEXT: [DW_RLE_start_end ]: [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [DW_RLE_base_address ]: 0x0000000000000040 +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x0000000000000002, 0x0000000000000008 => [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [DW_RLE_base_address ]: 0xffffffffffffffff +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x0000000000000002, 0x0000000000000008 => dead code +# CHECK-NEXT: [DW_RLE_base_addressx]: 0x0000000000000000 +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x0000000000000000, 0x0000000000000006 => [0x0000000000000000, 0x0000000000000006) +# CHECK-NEXT: [DW_RLE_base_addressx]: 0x0000000000000001 +# CHECK-NEXT: [DW_RLE_offset_pair ]: 0x0000000000000000, 0x0000000000000006 => [0x0000000000000001, 0x0000000000000007) +# CHECK-NEXT: [DW_RLE_end_of_list ] + + .section .debug_abbrev,"",@progbits +.Ldebug_abbrev4: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) +.Ldebug_abbrev5: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits + .long .Ldebug_info4_end-.Ldebug_info4_begin # Length of Unit +.Ldebug_info4_begin: + .short 4 # DWARF version number + .long .Ldebug_abbrev4 # Offset Into Abbrev. Section + .byte 4 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit + .long .Ldebug_ranges # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram + .long 0xffffffff # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram + .long 0x42 # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 0 # End Of Children Mark +.Ldebug_info4_end: + .long .Ldebug_info5_end-.Ldebug_info5_begin # Length of Unit +.Ldebug_info5_begin: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 4 # Address Size (in bytes) + .long .Ldebug_abbrev5 # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit + .long .Ldebug_addr_base # DW_AT_addr_base + .long .Ldebug_rnglists # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram + .uleb128 0 # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram + .uleb128 1 # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 0 # End Of Children Mark +.Ldebug_info5_end: + .long .Ldebug_info4_64_end-.Ldebug_info4_64_begin # Length of Unit +.Ldebug_info4_64_begin: + .short 4 # DWARF version number + .long .Ldebug_abbrev4 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit + .long .Ldebug_ranges_64 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram + .quad 0xffffffffffffffff # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram + .quad 0x42 # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 0 # End Of Children Mark +.Ldebug_info4_64_end: + .long .Ldebug_info5_64_end-.Ldebug_info5_64_begin # Length of Unit +.Ldebug_info5_64_begin: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .Ldebug_abbrev5 # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit + .long .Ldebug_addr_64_base # DW_AT_addr_base + .long .Ldebug_rnglists_64 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram + .uleb128 0 # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram + .uleb128 1 # DW_AT_low_pc + .long 0x6 # DW_AT_high_pc + .byte 0 # End Of Children Mark +.Ldebug_info5_64_end: + + .section .debug_ranges,"",@progbits +.Ldebug_ranges: + .long 0xfffffffe + .long 0xfffffffe + .long 0x42 + .long 0x48 + .long 0 + .long 0 +.Ldebug_ranges_64: + .quad 0xfffffffffffffffe + .quad 0xfffffffffffffffe + .quad 0x42 + .quad 0x48 + .quad 0 + .quad 0 + + .section .debug_rnglists,"",@progbits + .long .Ldebug_rnglists_end-.Ldebug_rnglists_begin # Length +.Ldebug_rnglists_begin: + .short 5 # Version + .byte 4 # Address size + .byte 0 # Segment selector size + .long 0 # Offset entry count +.Ldebug_rnglists: + .byte 7 # DW_RLE_start_length + .long 0xffffffff # start address + .uleb128 0x6 # length + .byte 7 # DW_RLE_start_length + .long 0x42 # start address + .uleb128 0x6 # length + .byte 3 # DW_RLE_startx_length + .uleb128 0 # start index + .uleb128 0x6 # length + .byte 3 # DW_RLE_startx_length + .uleb128 1 # start index + .uleb128 0x6 # length + .byte 6 # DW_RLE_start_end + .long 0xffffffff # start address + .long 0xffffffff # end address + .byte 6 # DW_RLE_start_end + .long 0x42 # start address + .long 0x48 # length +# FIXME: RLE_startx_endx unsupported by llvm-dwarfdump +# .byte 2 # DW_RLE_startx_endx +# .uleb128 0 # start address +# .uleb128 0 # length +# .byte 2 # DW_RLE_startx_endx +# .uleb128 1 # start address +# .uleb128 1 # length + .byte 5 # DW_RLE_base_address + .long 0x40 # address + .byte 4 # DW_RLE_offset_pair + .uleb128 2 # start offset + .uleb128 8 # end offset + .byte 5 # DW_RLE_base_address + .long 0xffffffff # address + .byte 4 # DW_RLE_offset_pair + .uleb128 2 # start offset + .uleb128 8 # end offset + .byte 1 # DW_RLE_base_addressx + .uleb128 0 # address + .byte 4 # DW_RLE_offset_pair + .uleb128 0 # start offset + .uleb128 6 # end offset + .byte 1 # DW_RLE_base_addressx + .uleb128 1 # address + .byte 4 # DW_RLE_offset_pair + .uleb128 0 # start offset + .uleb128 6 # end offset + .byte 0 # DW_RLE_end_of_list +.Ldebug_rnglists_end: + .long .Ldebug_rnglists_64_end-.Ldebug_rnglists_64_begin # Length +.Ldebug_rnglists_64_begin: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 0 # Offset entry count +.Ldebug_rnglists_64: + .byte 7 # DW_RLE_start_length + .quad 0xffffffffffffffff # start address + .uleb128 0x6 # length + .byte 7 # DW_RLE_start_length + .quad 0x42 # start address + .uleb128 0x6 # length + .byte 3 # DW_RLE_startx_length + .uleb128 0 # start index + .uleb128 0x6 # length + .byte 3 # DW_RLE_startx_length + .uleb128 1 # start index + .uleb128 0x6 # length + .byte 6 # DW_RLE_start_end + .quad 0xffffffffffffffff # start address + .quad 0xffffffffffffffff # end address + .byte 6 # DW_RLE_start_end + .quad 0x42 # start address + .quad 0x48 # length +# FIXME: RLE_startx_endx unsupported by llvm-dwarfdump +# .byte 2 # DW_RLE_startx_endx +# .uleb128 0 # start address +# .uleb128 0 # length +# .byte 2 # DW_RLE_startx_endx +# .uleb128 1 # start address +# .uleb128 1 # length + .byte 5 # DW_RLE_base_address + .quad 0x40 # address + .byte 4 # DW_RLE_offset_pair + .uleb128 2 # start offset + .uleb128 8 # end offset + .byte 5 # DW_RLE_base_address + .quad 0xffffffffffffffff # address + .byte 4 # DW_RLE_offset_pair + .uleb128 2 # start offset + .uleb128 8 # end offset + .byte 1 # DW_RLE_base_addressx + .uleb128 0 # address + .byte 4 # DW_RLE_offset_pair + .uleb128 0 # start offset + .uleb128 6 # end offset + .byte 1 # DW_RLE_base_addressx + .uleb128 1 # address + .byte 4 # DW_RLE_offset_pair + .uleb128 0 # start offset + .uleb128 6 # end offset + .byte 0 # DW_RLE_end_of_list +.Ldebug_rnglists_64_end: + + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end-.Ldebug_addr_begin # Length of contribution +.Ldebug_addr_begin: + .short 5 # DWARF version number + .byte 4 # Address size + .byte 0 # Segment selector size +.Ldebug_addr_base: + .long 0xffffffff + .long 0x42 +.Ldebug_addr_end: + .long .Ldebug_addr_64_end-.Ldebug_addr_64_begin # Length of contribution +.Ldebug_addr_64_begin: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Ldebug_addr_64_base: + .quad 0xffffffffffffffff + .quad 0x42 +.Ldebug_addr_64_end: + + .section .debug_line,"",@progbits +.Ldebug_line5: + .long .Ldebug_line5_end-.Ldebug_line5_begin # Length of Unit (DWARF-32 format) +.Ldebug_line5_begin: + .short 5 # DWARF version number + .byte 4 # Address Size + .byte 0 # Segment Selector Size + .long .Ldebug_line5_header_end-.Ldebug_line5_header_begin # Length of Prologue +.Ldebug_line5_header_begin: + .byte 1 # Minimum Instruction Length + .byte 1 # Maximum Operations per Instruction + .byte 1 # Default is_stmt + .byte -5 # Line Base + .byte 14 # Line Range + .byte 13 # Opcode Base + .byte 0 # Standard Opcode Lengths + .byte 1 + .byte 1 + .byte 1 + .byte 1 + .byte 0 + .byte 0 + .byte 0 + .byte 1 + .byte 0 + .byte 0 + .byte 1 + # Directory table format + .byte 1 # One element per directory entry + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + # Directory table entries + .byte 1 # Two directory entries + .asciz "dir1" + # File table format + .byte 2 # Four elements per file entry + .byte 2 # DW_LNCT_directory_index + .byte 0x0b # DW_FORM_data1 + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + # File table entries + .byte 1 # Two file entries + .byte 1 + .asciz "file1" +.Ldebug_line5_header_end: + .byte 0 # Extended opcode + .byte 5 # Size 5 + .byte 2 # Opcode: DW_LNE_set_address + .long 0xffffffff # address + .byte 1 # DW_LNS_copy + .byte 2 # DW_LNS_advance_pc + .uleb128 1 # instruction increment + .byte 0 # Extended opcode + .byte 1 # Size 1 + .byte 1 # Opcode: DW_LNE_end_sequence + .byte 0 # Extended opcode + .byte 5 # Size 5 + .byte 2 # Opcode: DW_LNE_set_address + .long 0x42 # address + .byte 1 # DW_LNS_copy + .byte 2 # DW_LNS_advance_pc + .uleb128 1 # instruction increment + .byte 0 # Extended opcode + .byte 1 # Size 1 + .byte 1 # Opcode: DW_LNE_end_sequence +.Ldebug_line5_end: + +.Ldebug_line5_64: + .long .Ldebug_line5_64_end-.Ldebug_line5_64_begin # Length of Unit (DWARF-32 format) +.Ldebug_line5_64_begin: + .short 5 # DWARF version number + .byte 8 # Address Size + .byte 0 # Segment Selector Size + .long .Ldebug_line5_64_header_end-.Ldebug_line5_64_header_begin # Length of Prologue +.Ldebug_line5_64_header_begin: + .byte 1 # Minimum Instruction Length + .byte 1 # Maximum Operations per Instruction + .byte 1 # Default is_stmt + .byte -5 # Line Base + .byte 14 # Line Range + .byte 13 # Opcode Base + .byte 0 # Standard Opcode Lengths + .byte 1 + .byte 1 + .byte 1 + .byte 1 + .byte 0 + .byte 0 + .byte 0 + .byte 1 + .byte 0 + .byte 0 + .byte 1 + # Directory table format + .byte 1 # One element per directory entry + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + # Directory table entries + .byte 1 # Two directory entries + .asciz "dir1" + # File table format + .byte 2 # Four elements per file entry + .byte 2 # DW_LNCT_directory_index + .byte 0x0b # DW_FORM_data1 + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + # File table entries + .byte 1 # Two file entries + .byte 1 + .asciz "file1" +.Ldebug_line5_64_header_end: + .byte 0 # Extended opcode + .byte 9 # Size 9 + .byte 2 # Opcode: DW_LNE_set_address + .quad 0xffffffffffffffff # address + .byte 1 # DW_LNS_copy + .byte 2 # DW_LNS_advance_pc + .uleb128 1 # instruction increment + .byte 0 # Extended opcode + .byte 1 # Size 1 + .byte 1 # Opcode: DW_LNE_end_sequence + .byte 0 # Extended opcode + .byte 9 # Size 9 + .byte 2 # Opcode: DW_LNE_set_address + .quad 0x42 # address + .byte 1 # DW_LNS_copy + .byte 2 # DW_LNS_advance_pc + .uleb128 1 # instruction increment + .byte 0 # Extended opcode + .byte 1 # Size 1 + .byte 1 # Opcode: DW_LNE_end_sequence +.Ldebug_line5_64_end: + From 80ac6da98e8fb88ed68308acc8e8689420a54f90 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 4 Oct 2020 22:06:57 +0300 Subject: [PATCH 514/544] [NFC][SCEV] Add a test with some patterns where we could treat inttoptr/ptrtoint as semi-transparent --- ...emi-transparent-inttoptr-ptrtoint-casts.ll | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 llvm/test/Analysis/ScalarEvolution/semi-transparent-inttoptr-ptrtoint-casts.ll diff --git a/llvm/test/Analysis/ScalarEvolution/semi-transparent-inttoptr-ptrtoint-casts.ll b/llvm/test/Analysis/ScalarEvolution/semi-transparent-inttoptr-ptrtoint-casts.ll new file mode 100644 index 0000000000000..d9fdfa03d7a77 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/semi-transparent-inttoptr-ptrtoint-casts.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s +; RUN: opt < %s -S -disable-output "-passes=print" 2>&1 | FileCheck %s + +; While we can't treat inttoptr/ptrtoint casts as fully transparent, +; instead of modelling them as fully opaque (unknown), we can at least model +; their source values are opaque (unknown). Which, given e.g.: +; %x = ??? +; %y = inttoptr %x +; %z = inttoptr %x +; at least allows us to tell that %y and %z are identical. +; Note that we must stop at that, we can not further analyze %x itself. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define dso_local void @ptrtoint(i8* %in, i64* %out0, i64* %out1, i32* %out2, i128* %out3) { +; CHECK-LABEL: 'ptrtoint' +; CHECK-NEXT: Classifying expressions for: @ptrtoint +; CHECK-NEXT: %in_adj = getelementptr inbounds i8, i8* %in, i64 42 +; CHECK-NEXT: --> (42 + %in) U: [-9223372036854775766,-9223372036854775808) S: [-9223372036854775766,-9223372036854775808) +; CHECK-NEXT: %p0 = ptrtoint i8* %in_adj to i64 +; CHECK-NEXT: --> %p0 U: full-set S: full-set +; CHECK-NEXT: %p1 = ptrtoint i8* %in_adj to i64 +; CHECK-NEXT: --> %p1 U: full-set S: full-set +; CHECK-NEXT: %p2 = ptrtoint i8* %in_adj to i32 +; CHECK-NEXT: --> %p2 U: full-set S: full-set +; CHECK-NEXT: %p3 = ptrtoint i8* %in_adj to i128 +; CHECK-NEXT: --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616) +; CHECK-NEXT: Determining loop execution counts for: @ptrtoint +; + %in_adj = getelementptr inbounds i8, i8* %in, i64 42 + %p0 = ptrtoint i8* %in_adj to i64 + %p1 = ptrtoint i8* %in_adj to i64 + %p2 = ptrtoint i8* %in_adj to i32 + %p3 = ptrtoint i8* %in_adj to i128 + store i64 %p0, i64* %out0 + store i64 %p1, i64* %out1 + store i32 %p2, i32* %out2 + store i128 %p3, i128* %out3 + ret void +} + +define dso_local void @inttoptr(i64 %in0, i8** %out0, i8** %out1) { +; CHECK-LABEL: 'inttoptr' +; CHECK-NEXT: Classifying expressions for: @inttoptr +; CHECK-NEXT: %in_adj = add i64 %in0, 42 +; CHECK-NEXT: --> (42 + %in0) U: full-set S: full-set +; CHECK-NEXT: %i0 = inttoptr i64 %in_adj to i8* +; CHECK-NEXT: --> %i0 U: full-set S: full-set +; CHECK-NEXT: %i1 = inttoptr i64 %in_adj to i8* +; CHECK-NEXT: --> %i1 U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @inttoptr +; + %in_adj = add i64 %in0, 42 + %i0 = inttoptr i64 %in_adj to i8* + %i1 = inttoptr i64 %in_adj to i8* + store i8* %i0, i8** %out0 + store i8* %i1, i8** %out1 + ret void +} +define dso_local void @inttoptr_widening(i32 %in1, i8** %out2) { +; CHECK-LABEL: 'inttoptr_widening' +; CHECK-NEXT: Classifying expressions for: @inttoptr_widening +; CHECK-NEXT: %in_adj = add i32 %in1, 42 +; CHECK-NEXT: --> (42 + %in1) U: full-set S: full-set +; CHECK-NEXT: %i0 = inttoptr i32 %in_adj to i8* +; CHECK-NEXT: --> %i0 U: [0,4294967296) S: [-4294967296,4294967296) +; CHECK-NEXT: Determining loop execution counts for: @inttoptr_widening +; + %in_adj = add i32 %in1, 42 + %i0 = inttoptr i32 %in_adj to i8* + store i8* %i0, i8** %out2 + ret void +} +define dso_local void @inttoptr_narrowing(i128 %in2, i8** %out3) { +; CHECK-LABEL: 'inttoptr_narrowing' +; CHECK-NEXT: Classifying expressions for: @inttoptr_narrowing +; CHECK-NEXT: %in_adj = add i128 %in2, 42 +; CHECK-NEXT: --> (42 + %in2) U: full-set S: full-set +; CHECK-NEXT: %i0 = inttoptr i128 %in_adj to i8* +; CHECK-NEXT: --> %i0 U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @inttoptr_narrowing +; + %in_adj = add i128 %in2, 42 + %i0 = inttoptr i128 %in_adj to i8* + store i8* %i0, i8** %out3 + ret void +} + +; Note that we never try to analyze the value of the ptrtoint/inttoptr! +define i8* @onlysemitransparency(i8* %in) { +; CHECK-LABEL: 'onlysemitransparency' +; CHECK-NEXT: Classifying expressions for: @onlysemitransparency +; CHECK-NEXT: %i0 = ptrtoint i8* %in to i64 +; CHECK-NEXT: --> %i0 U: full-set S: full-set +; CHECK-NEXT: %i1 = inttoptr i64 %i0 to i8* +; CHECK-NEXT: --> %i1 U: full-set S: full-set +; CHECK-NEXT: %i2 = ptrtoint i8* %i1 to i64 +; CHECK-NEXT: --> %i2 U: full-set S: full-set +; CHECK-NEXT: %i3 = inttoptr i64 %i2 to i8* +; CHECK-NEXT: --> %i3 U: full-set S: full-set +; CHECK-NEXT: %i4 = ptrtoint i8* %i3 to i64 +; CHECK-NEXT: --> %i4 U: full-set S: full-set +; CHECK-NEXT: %i5 = inttoptr i64 %i4 to i8* +; CHECK-NEXT: --> %i5 U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @onlysemitransparency +; + %i0 = ptrtoint i8* %in to i64 + %i1 = inttoptr i64 %i0 to i8* + %i2 = ptrtoint i8* %i1 to i64 + %i3 = inttoptr i64 %i2 to i8* + %i4 = ptrtoint i8* %i3 to i64 + %i5 = inttoptr i64 %i4 to i8* + ret i8* %i5 +} From 37010d4ddf477d3cc60792a92918af5f2f6e42c3 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 14 Sep 2020 11:22:17 -0700 Subject: [PATCH 515/544] [Coroutines][NewPM] Fix coroutine tests under new pass manager Some new function parameter attributes are derived under NPM. Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D88760 --- llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll | 9 +++++---- .../test/Transforms/Coroutines/coro-retcon-once-value.ll | 6 ++++-- .../Transforms/Coroutines/coro-retcon-resume-values.ll | 3 ++- llvm/test/Transforms/Coroutines/coro-retcon-value.ll | 3 ++- llvm/test/Transforms/Coroutines/coro-retcon.ll | 3 ++- llvm/test/Transforms/Coroutines/coro-swifterror.ll | 4 ++-- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll index 61c21324d94a2..19cc873fb3297 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck %s target datalayout = "p:64:64:64" @@ -39,7 +40,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1) +; CHECK-LABEL: define internal { i8*, i8*, i32 } @f.resume.0(i8* {{.*}} %0, i1 %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T1:%.*]] = bitcast i8* %0 to i8** ; CHECK-NEXT: [[ALLOC:%.*]] = load i8*, i8** [[T1]], align 8 @@ -82,7 +83,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1) +; CHECK-LABEL: define internal { i8*, i32 } @g.resume.0(i8* {{.*}} %0, i1 %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : @@ -131,7 +132,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0, i1 %1) +; CHECK-LABEL: define internal { i8*, i32 } @h.resume.0(i8* {{.*}} %0, i1 %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : @@ -179,7 +180,7 @@ loop2: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* noalias nonnull align 8 dereferenceable(1024) %0) +; CHECK-LABEL: define internal { i8*, i32 } @i.resume.0(i8* {{.*}} %0) ; CHECK-NEXT: : ; CHECK-NEXT: [[NSLOT:%.*]] = bitcast i8* %0 to i32* ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[NSLOT]], align 8 diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll index 6e80da87bc09f..573eb8231de79 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck %s + target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.12.0" @@ -45,7 +47,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[T1]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1) +; CHECK-LABEL: define internal void @f.resume.0(i8* {{.*}} %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : @@ -57,7 +59,7 @@ cleanup: ; CHECK-NEXT: ret void ; CHECK-NEXT: } -; CHECK-LABEL: define internal void @f.resume.1(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1) +; CHECK-LABEL: define internal void @f.resume.1(i8* {{.*}} %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll index 80e8170d7ba1e..52e6f076f0375 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck %s define i8* @f(i8* %buffer, i32 %n) { entry: @@ -30,7 +31,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32, i1)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1, i1 zeroext %2) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* {{.*}} %0, i32 %1, i1 zeroext %2) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to i32* ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 4 diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-value.ll b/llvm/test/Transforms/Coroutines/coro-retcon-value.ll index 29ec7cda170f2..2c66b6af99c69 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-value.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-value.ll @@ -1,5 +1,6 @@ ; First example from Doc/Coroutines.rst (two block loop) converted to retcon ; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck %s define {i8*, i32} @f(i8* %buffer, i32 %n) { entry: @@ -30,7 +31,7 @@ cleanup: ; CHECK-NEXT: ret { i8*, i32 } [[RET]] ; CHECK-NEXT: } -; CHECK-LABEL: define internal { i8*, i32 } @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i8 zeroext %1) +; CHECK-LABEL: define internal { i8*, i32 } @f.resume.0(i8* {{.*}} %0, i8 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[T0:%.*]] = icmp eq i8 %1, 0 ; CHECK-NEXT: br i1 [[T0]], diff --git a/llvm/test/Transforms/Coroutines/coro-retcon.ll b/llvm/test/Transforms/Coroutines/coro-retcon.ll index 13283f05b2661..0e109e24bc3e3 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -1,5 +1,6 @@ ; First example from Doc/Coroutines.rst (two block loop) converted to retcon ; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s +; RUN: opt < %s -enable-coroutines -aa-pipeline=basic-aa -passes='default' -S | FileCheck %s define i8* @f(i8* %buffer, i32 %n) { entry: @@ -30,7 +31,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* {{.*}} %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : diff --git a/llvm/test/Transforms/Coroutines/coro-swifterror.ll b/llvm/test/Transforms/Coroutines/coro-swifterror.ll index 7390bb77ca9b6..b725b24a7a4cd 100644 --- a/llvm/test/Transforms/Coroutines/coro-swifterror.ll +++ b/llvm/test/Transforms/Coroutines/coro-swifterror.ll @@ -40,7 +40,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1, i8**)* @f.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1, i8** swifterror %2) +; CHECK-LABEL: define internal i8* @f.resume.0(i8* {{.*}} %0, i1 zeroext %1, i8** swifterror %2) ; CHECK-NEXT: : ; CHECK-NEXT: br i1 %1, ; CHECK: : @@ -102,7 +102,7 @@ cleanup: ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1)* @g.resume.0 to i8*) ; CHECK-NEXT: } -; CHECK-LABEL: define internal i8* @g.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i1 zeroext %1) +; CHECK-LABEL: define internal i8* @g.resume.0(i8* {{.*}} %0, i1 zeroext %1) ; CHECK-NEXT: : ; CHECK-NEXT: [[ERRORSLOT:%.*]] = alloca swifterror i8*, align 4 ; CHECK-NEXT: br i1 %1, From b0dce6b37f15f487064223f7e3e6a5701a9d7bff Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Sun, 4 Oct 2020 15:17:34 -0700 Subject: [PATCH 516/544] Revert "[RFC] Factor out repetitive cmake patterns for llvm-style projects" This reverts commit e9b87f43bde8b5f0d8a79c5884fdce639b12e0ca. There are issues with macros generating macros without an obvious simple fix so I'm going to revert this and try something different. --- llvm/cmake/modules/LLVMProjectOptions.cmake | 68 ----------- llvm/cmake/modules/LLVMProjectTargets.cmake | 109 ------------------ mlir/CMakeLists.txt | 31 +++-- mlir/cmake/modules/AddMLIR.cmake | 9 +- mlir/examples/standalone/CMakeLists.txt | 9 -- .../standalone/standalone-opt/CMakeLists.txt | 2 +- .../standalone-translate/CMakeLists.txt | 2 +- mlir/examples/toy/CMakeLists.txt | 2 +- mlir/test/Examples/standalone/test.toy | 1 - mlir/tools/mlir-cpu-runner/CMakeLists.txt | 2 +- mlir/tools/mlir-cuda-runner/CMakeLists.txt | 2 +- mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt | 2 +- mlir/tools/mlir-opt/CMakeLists.txt | 2 +- mlir/tools/mlir-reduce/CMakeLists.txt | 2 +- mlir/tools/mlir-rocm-runner/CMakeLists.txt | 2 +- mlir/tools/mlir-translate/CMakeLists.txt | 2 +- mlir/tools/mlir-vulkan-runner/CMakeLists.txt | 2 +- 17 files changed, 28 insertions(+), 221 deletions(-) delete mode 100644 llvm/cmake/modules/LLVMProjectOptions.cmake delete mode 100644 llvm/cmake/modules/LLVMProjectTargets.cmake diff --git a/llvm/cmake/modules/LLVMProjectOptions.cmake b/llvm/cmake/modules/LLVMProjectOptions.cmake deleted file mode 100644 index ce466953280ed..0000000000000 --- a/llvm/cmake/modules/LLVMProjectOptions.cmake +++ /dev/null @@ -1,68 +0,0 @@ -# LLVM-style projects generally have the same directory structure. This file -# provides some bolierplate cmake support for projects that supports this -# directory structure. Note that generally speaking, projects should prefer -# to use their own rules for these rather than relying on the core llvm build -# targets. - -# Generally name should be lower case. -function(add_llvm_project_options name) - string(TOUPPER "${name}" uppername) - - # Define options to control the inclusion and default build behavior for - # components which may not strictly be necessary (tools, examples, and tests). - # - # This is primarily to support building smaller or faster project files. - option(${uppername}_INCLUDE_TOOLS - "Generate build targets for the ${uppername} tools." - ${LLVM_INCLUDE_TOOLS}) - option(${uppername}_BUILD_TOOLS - "Build the ${uppername} tools. If OFF, just generate build targets." - ${LLVM_BUILD_TOOLS}) - - option(${uppername}_INCLUDE_UTILS - "Generate build targets for the ${uppername} utils." - ${LLVM_INCLUDE_UTILS}) - option(${uppername}_BUILD_UTILS - "Build ${uppername} utility binaries. If OFF, just generate build targets." - ${LLVM_BUILD_UTILS}) - option(${uppername}_INSTALL_UTILS - "Include utility binaries in the 'install' target." - ${LLVM_INSTALL_UTILS}) - - # i.e. Don't install headers, for instance. - option(${uppername}_INSTALL_TOOLCHAIN_ONLY - "Only include toolchain files in the 'install' target." - ${LLVM_INSTALL_TOOLCHAIN_ONLY}) - - option(${uppername}_BUILD_EXAMPLES - "Build the ${uppername} example programs. If OFF, just generate build targets." - ${LLVM_BUILD_EXAMPLES}) - option(${uppername}_INCLUDE_EXAMPLES - "Generate build targets for the ${uppername} examples" - ${LLVM_INCLUDE_EXAMPLES}) - if(${uppername}_BUILD_EXAMPLES) - add_definitions(-DBUILD_EXAMPLES) - endif(${uppername}_BUILD_EXAMPLES) - - option(${uppername}_BUILD_TESTS - "Build ${uppername} unit tests. If OFF, just generate build targets." - ${LLVM_BUILD_TESTS}) - option(${uppername}_INCLUDE_TESTS - "Generate build targets for the ${uppername} unit tests." - ${LLVM_INCLUDE_TESTS}) - if (${uppername}_INCLUDE_TESTS) - add_definitions(-D${uppername}_INCLUDE_TESTS) - endif() - - option(${uppername}_INCLUDE_INTEGRATION_TESTS - "Generate build targets for the ${uppername} integration tests." - ${LLVM_INCLUDE_INTEGRATION_TESTS}) - if (${uppername}_INCLUDE_INTEGRATION_TESTS) - add_definitions(-D${uppername}_INCLUDE_INTEGRATION_TESTS) - endif() - - option(${uppername}_INCLUDE_DOCS - "Generate build targets for the ${uppername} docs." - ${LLVM_INCLUDE_DOCS}) - -endfunction(add_llvm_project_options) diff --git a/llvm/cmake/modules/LLVMProjectTargets.cmake b/llvm/cmake/modules/LLVMProjectTargets.cmake deleted file mode 100644 index 4e73706d14777..0000000000000 --- a/llvm/cmake/modules/LLVMProjectTargets.cmake +++ /dev/null @@ -1,109 +0,0 @@ -# For project foo, this function generates: -# add_foo_tool(name) (An executable installed by default) -# add_foo_utility(name) (An executable *not* installed by default) -# add_foo_example(name) (An executable which is built, but never installed) -# add_foo_example_library(name) (A library to go along with an example) - -# It also assumes the following configuration environment variables -# (see LLVMProjectOptions.cmake) -# FOO_TOOLS_INSTALL_DIR -# FOO_BUILD_TOOLS -# FOO_BUILD_UTILS -# FOO_INSTALL_UTILS -# FOO_BUILD_EXAMPLES -# FOO_HAS_EXPORTS -# FOO_INSTALL_TOOLCHAIN_ONLY - -function(add_llvm_project_targets projectname) - string(TOUPPER "${name}" upperprojectname) - - macro(add_${projectname}_tool name) - if( NOT ${upperprojectname}_BUILD_TOOLS ) - set(EXCLUDE_FROM_ALL ON) - endif() - add_llvm_executable(${name} ${ARGN}) - - if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT ${upperprojectname}_INSTALL_TOOLCHAIN_ONLY) - if( ${upperprojectname}_BUILD_TOOLS ) - set(export_to_${projectname}exports) - if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR - NOT LLVM_DISTRIBUTION_COMPONENTS) - set(export_to_${projectname}exports EXPORT ${upperprojectname}Exports) - set_property(GLOBAL PROPERTY ${upperprojectname}_HAS_EXPORTS True) - endif() - - install(TARGETS ${name} - ${export_to_${projectname}exports} - RUNTIME DESTINATION ${${upperprojectname}_TOOLS_INSTALL_DIR} - COMPONENT ${name}) - - if (NOT LLVM_ENABLE_IDE) - add_llvm_install_targets(install-${name} - DEPENDS ${name} - COMPONENT ${name}) - endif() - endif() - endif() - if( ${upperprojectname}_BUILD_TOOLS ) - set_property(GLOBAL APPEND PROPERTY ${upperprojectname}_EXPORTS ${name}) - endif() - set_target_properties(${name} PROPERTIES FOLDER "Tools") - endmacro(add_${projectname}_tool name) - - macro(add_${projectname}_example name) - if( NOT ${upperprojectname}_BUILD_EXAMPLES ) - set(EXCLUDE_FROM_ALL ON) - endif() - add_llvm_executable(${name} ${ARGN}) - if( ${upperprojectname}_BUILD_EXAMPLES ) - install(TARGETS ${name} RUNTIME DESTINATION examples) - endif() - set_target_properties(${name} PROPERTIES FOLDER "Examples") - endmacro(add_${projectname}_example name) - - macro(add_${projectname}_example_library name) - if( NOT ${upperprojectname}_BUILD_EXAMPLES ) - set(EXCLUDE_FROM_ALL ON) - add_llvm_library(${name} BUILDTREE_ONLY ${ARGN}) - else() - add_llvm_library(${name} ${ARGN}) - endif() - - set_target_properties(${name} PROPERTIES FOLDER "Examples") - endmacro(add_${projectname}_example_library name) - - # This is a macro that is used to create targets for executables that are needed - # for development, but that are not intended to be installed by default. - macro(add_${projectname}_utility name) - if ( NOT ${upperprojectname}_BUILD_UTILS ) - set(EXCLUDE_FROM_ALL ON) - endif() - - add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) - set_target_properties(${name} PROPERTIES FOLDER "Utils") - if (NOT ${upperprojectname}_INSTALL_TOOLCHAIN_ONLY) - if (${upperprojectname}_INSTALL_UTILS AND ${upperprojectname}_BUILD_UTILS) - set(export_to_${projectname}exports) - if (${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR - NOT LLVM_DISTRIBUTION_COMPONENTS) - set(export_to_${projectname}exports EXPORT ${upperprojectname}Exports) - set_property(GLOBAL PROPERTY ${upperprojectname}_HAS_EXPORTS True) - endif() - - install(TARGETS ${name} - ${export_to_${projectname}exports} - RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} - COMPONENT ${name}) - - if (NOT LLVM_ENABLE_IDE) - add_llvm_install_targets(install-${name} - DEPENDS ${name} - COMPONENT ${name}) - endif() - set_property(GLOBAL APPEND PROPERTY ${upperprojectname}_EXPORTS ${name}) - elseif(${upperprojectname}_BUILD_UTILS) - set_property(GLOBAL APPEND PROPERTY ${upperprojectname}_EXPORTS_BUILDTREE_ONLY ${name}) - endif() - endif() - endmacro(add_${projectname}_utility name) -endfunction(add_llvm_project_targets) diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index ffba3bea224ee..50511fd2aef96 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -21,10 +21,6 @@ set_target_properties(mlir-headers PROPERTIES FOLDER "Misc") add_dependencies(mlir-headers mlir-generic-headers) add_custom_target(mlir-doc) -# Get a bunch of LLVM-style default options. -include(LLVMProjectOptions) -add_llvm_project_options(mlir) - # Build the CUDA conversions and run according tests if the NVPTX backend # is available if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) @@ -48,6 +44,13 @@ set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner" set(MLIR_ROCM_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir ROCm runner") set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner") +option(MLIR_INCLUDE_TESTS + "Generate build targets for the MLIR unit tests." + ${LLVM_INCLUDE_TESTS}) + +option(MLIR_INCLUDE_INTEGRATION_TESTS + "Generate build targets for the MLIR integration tests.") + #------------------------------------------------------------------------------- # Python Bindings Configuration # Requires: @@ -80,46 +83,42 @@ if(MLIR_BINDINGS_PYTHON_ENABLED) "extension = '${PYTHON_MODULE_EXTENSION}") endif() -# Get a bunch of default targets -include(LLVMProjectTargets) -add_llvm_project_targets(mlir) - include_directories( "include") include_directories( ${MLIR_INCLUDE_DIR}) # Adding tools/mlir-tblgen here as calling add_tablegen sets some variables like # MLIR_TABLEGEN_EXE in PARENT_SCOPE which gets lost if that folder is included # from another directory like tools -if (MLIR_INCLUDE_TOOLS) - add_subdirectory(tools/mlir-tblgen) -endif() +add_subdirectory(tools/mlir-tblgen) add_subdirectory(include/mlir) add_subdirectory(lib) # C API needs all dialects for registration, but should be built before tests. add_subdirectory(lib/CAPI) if (MLIR_INCLUDE_TESTS) + add_definitions(-DMLIR_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) endif() if (MLIR_INCLUDE_INTEGRATION_TESTS) + add_definitions(-DMLIR_INCLUDE_INTEGRATION_TESTS) add_subdirectory(integration_test) endif() # Tools needs to come late to ensure that MLIR_ALL_LIBS is populated. # Generally things after this point may depend on MLIR_ALL_LIBS or libMLIR.so. -if (MLIR_INCLUDE_TOOLS) - add_subdirectory(tools) -endif() +add_subdirectory(tools) -if (MLIR_INCLUDE_EXAMPLES) +if( LLVM_INCLUDE_EXAMPLES ) add_subdirectory(examples) endif() +option(MLIR_INCLUDE_DOCS "Generate build targets for the MLIR docs." + ${LLVM_INCLUDE_DOCS}) if (MLIR_INCLUDE_DOCS) add_subdirectory(docs) endif() -if (NOT MLIR_INSTALL_TOOLCHAIN_ONLY) +if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/mlir include/mlir-c DESTINATION include COMPONENT mlir-headers diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake index 56742db33ee19..8394c056c1db5 100644 --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -24,12 +24,7 @@ function(add_mlir_interface interface) endfunction() -# Generate Documentation using the mlir-doc rule -# doc_filename: the basename of a .td tablegen file -# command: the tablegen command to run, typically "-gen-op-doc", -# "-gen-pass-doc", or "-gen-dialect-doc" -# output_file: the basename of a .md markdown file to be output -# output_directory: the directory to place the output +# Generate Documentation function(add_mlir_doc doc_filename command output_file output_directory) set(LLVM_TARGET_DEFINITIONS ${doc_filename}.td) tablegen(MLIR ${output_file}.md ${command} "-I${MLIR_MAIN_INCLUDE_DIR}" "-I${MLIR_INCLUDE_DIR}") @@ -45,7 +40,7 @@ function(add_mlir_doc doc_filename command output_file output_directory) endfunction() # Declare an mlir library which can be compiled in libMLIR.so -# In addition to everything that llvm_add_library accepts, this +# In addition to everything that llvm_add_librar accepts, this # also has the following option: # EXCLUDE_FROM_LIBMLIR # Don't include this library in libMLIR.so. This option should be used diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt index 721efae0388b0..45dc80804aa9a 100644 --- a/mlir/examples/standalone/CMakeLists.txt +++ b/mlir/examples/standalone/CMakeLists.txt @@ -31,17 +31,8 @@ list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") include(TableGen) include(AddLLVM) include(AddMLIR) - -# Get a bunch of LLVM-style default options. -include(LLVMProjectOptions) -add_llvm_project_options(standalone) - include(HandleLLVMOptions) -# Get a bunch of default targets -include(LLVMProjectTargets) -add_llvm_project_targets(standalone) - include_directories(${LLVM_INCLUDE_DIRS}) include_directories(${MLIR_INCLUDE_DIRS}) include_directories(${PROJECT_SOURCE_DIR}/include) diff --git a/mlir/examples/standalone/standalone-opt/CMakeLists.txt b/mlir/examples/standalone/standalone-opt/CMakeLists.txt index e4b12e01228a4..06bbb4712645a 100644 --- a/mlir/examples/standalone/standalone-opt/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-opt/CMakeLists.txt @@ -6,7 +6,7 @@ set(LIBS MLIROptLib MLIRStandalone ) -add_standalone_tool(standalone-opt standalone-opt.cpp) +add_llvm_executable(standalone-opt standalone-opt.cpp) llvm_update_compile_flags(standalone-opt) target_link_libraries(standalone-opt PRIVATE ${LIBS}) diff --git a/mlir/examples/standalone/standalone-translate/CMakeLists.txt b/mlir/examples/standalone/standalone-translate/CMakeLists.txt index 15aa237fd18e2..137f7947cfac7 100644 --- a/mlir/examples/standalone/standalone-translate/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-translate/CMakeLists.txt @@ -5,7 +5,7 @@ set(LLVM_LINK_COMPONENTS get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) -add_standalone_tool(standalone-translate +add_llvm_executable(standalone-translate standalone-translate.cpp ) llvm_update_compile_flags(standalone-translate) diff --git a/mlir/examples/toy/CMakeLists.txt b/mlir/examples/toy/CMakeLists.txt index 39f6bd09a75c7..56002b1ad2e27 100644 --- a/mlir/examples/toy/CMakeLists.txt +++ b/mlir/examples/toy/CMakeLists.txt @@ -3,7 +3,7 @@ set_target_properties(Toy PROPERTIES FOLDER Examples) macro(add_toy_chapter name) add_dependencies(Toy ${name}) - add_mlir_example(${name} ${ARGN}) + add_llvm_example(${name} ${ARGN}) endmacro(add_toy_chapter name) add_subdirectory(Ch1) diff --git a/mlir/test/Examples/standalone/test.toy b/mlir/test/Examples/standalone/test.toy index cd183c9f2fd0e..7b4a9c23906e3 100644 --- a/mlir/test/Examples/standalone/test.toy +++ b/mlir/test/Examples/standalone/test.toy @@ -1,5 +1,4 @@ # RUN: %cmake %mlir_src_root/examples/standalone -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc -DMLIR_DIR=%llvm_lib_dir/cmake/mlir ; %cmake --build . --target check-standalone | tee %t | FileCheck %s -# RUN: %cmake --build . --target mlir-doc # CHECK: Passed: 3 # UNSUPPORTED: windows, android diff --git a/mlir/tools/mlir-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-cpu-runner/CMakeLists.txt index 7cd81128758d7..596012c882280 100644 --- a/mlir/tools/mlir-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cpu-runner/CMakeLists.txt @@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS nativecodegen ) -add_mlir_tool(mlir-cpu-runner +add_llvm_tool(mlir-cpu-runner mlir-cpu-runner.cpp ) llvm_update_compile_flags(mlir-cpu-runner) diff --git a/mlir/tools/mlir-cuda-runner/CMakeLists.txt b/mlir/tools/mlir-cuda-runner/CMakeLists.txt index 16daca88bc98f..5488262d7ee7e 100644 --- a/mlir/tools/mlir-cuda-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cuda-runner/CMakeLists.txt @@ -68,7 +68,7 @@ if(MLIR_CUDA_RUNNER_ENABLED) LIST(APPEND targets_to_link "LLVM${t}") ENDFOREACH(t) - add_mlir_tool(mlir-cuda-runner + add_llvm_tool(mlir-cuda-runner mlir-cuda-runner.cpp DEPENDS diff --git a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt index c27857b3b7ca6..bc9a0c1f310a1 100644 --- a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt +++ b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_LINK_COMPONENTS Core Support ) -add_mlir_tool(mlir-linalg-ods-gen +add_llvm_tool(mlir-linalg-ods-gen mlir-linalg-ods-gen.cpp ) llvm_update_compile_flags(mlir-linalg-ods-gen) diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index 65a328fa141e9..483dcfec0c0ff 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -50,7 +50,7 @@ add_mlir_library(MLIRMlirOptMain ${LIBS} ) -add_mlir_tool(mlir-opt +add_llvm_tool(mlir-opt mlir-opt.cpp DEPENDS diff --git a/mlir/tools/mlir-reduce/CMakeLists.txt b/mlir/tools/mlir-reduce/CMakeLists.txt index 8e4a42f5882bd..958c2c94cc684 100644 --- a/mlir/tools/mlir-reduce/CMakeLists.txt +++ b/mlir/tools/mlir-reduce/CMakeLists.txt @@ -43,7 +43,7 @@ set(LIBS MLIRTransformUtils ) -add_mlir_tool(mlir-reduce +add_llvm_tool(mlir-reduce OptReductionPass.cpp Passes/OpReducer.cpp ReductionNode.cpp diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt index 3c90beac0b57e..2c0791d7a5c1d 100644 --- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt +++ b/mlir/tools/mlir-rocm-runner/CMakeLists.txt @@ -104,7 +104,7 @@ if(MLIR_ROCM_RUNNER_ENABLED) LIST(APPEND targets_to_link "LLVM${t}") ENDFOREACH(t) - add_mlir_tool(mlir-rocm-runner + add_llvm_tool(mlir-rocm-runner mlir-rocm-runner.cpp DEPENDS diff --git a/mlir/tools/mlir-translate/CMakeLists.txt b/mlir/tools/mlir-translate/CMakeLists.txt index cc7ff64da42e7..99b98f9288b92 100644 --- a/mlir/tools/mlir-translate/CMakeLists.txt +++ b/mlir/tools/mlir-translate/CMakeLists.txt @@ -5,7 +5,7 @@ set(LLVM_LINK_COMPONENTS get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) -add_mlir_tool(mlir-translate +add_llvm_tool(mlir-translate mlir-translate.cpp ) llvm_update_compile_flags(mlir-translate) diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt index c11b4ef7c9f26..c7a03259bb839 100644 --- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt +++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt @@ -85,7 +85,7 @@ if (MLIR_VULKAN_RUNNER_ENABLED) LIST(APPEND targets_to_link "LLVM${t}") ENDFOREACH(t) - add_mlir_tool(mlir-vulkan-runner + add_llvm_tool(mlir-vulkan-runner mlir-vulkan-runner.cpp ) add_dependencies(mlir-vulkan-runner vulkan-runtime-wrappers) From 0db97234cf490e464c82f2191fef2d8a163106fb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 12:17:54 -0700 Subject: [PATCH 517/544] [X86] Remove usesCustomInserter from MWAITX_SAVE_EBX and MWAITX_SAVE_RBX. NFC These are now emitted by a CustomInserter rather than using a custom inserter themselves. --- llvm/lib/Target/X86/X86InstrCompiler.td | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 9e6a5fb934de4..32686659700d7 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -901,8 +901,7 @@ def LCMPXCHG16B_SAVE_RBX : // cf comment for LCMPXCHG8B_SAVE_EBX. let Defs = [ECX, EAX, EBX, EFLAGS], Uses = [ECX, EAX, EBX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], - isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst", - usesCustomInserter = 1 in { + isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst" in { def MWAITX_SAVE_EBX : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$ebx_input, GR32:$ebx_save), @@ -912,8 +911,7 @@ def MWAITX_SAVE_EBX : // Same as MWAITX_SAVE_EBX but for the case where RBX is the base pointer. let Defs = [ECX, EAX, EBX, EFLAGS], Uses = [ECX, EAX, EBX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], - isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst", - usesCustomInserter = 1 in { + isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in { def MWAITX_SAVE_RBX : I<0, Pseudo, (outs GR64:$dst), (ins GR32:$ebx_input, GR64:$rbx_save), From 952dfd76c6696207cc290c4f6f15d5dea5cca795 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 13:53:13 -0700 Subject: [PATCH 518/544] [X86] Correct the implicit defs/uses for the MWAITX pseudo instructions. MWAITX doesn't touch EFLAGS so no pseudos should def EFLAGS. The SAVE_EBX/RBX pseudos only needs to def the EBX register that the expansion overwrites. The EAX and ECX registers are only read. The pseudo emitted during isel that is used by the custom inserter shouldn't have any implicit defs or uses since everything is in vregs. --- llvm/lib/Target/X86/X86InstrCompiler.td | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 32686659700d7..0f78b7d35aa6f 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -899,7 +899,7 @@ def LCMPXCHG16B_SAVE_RBX : // This pseudo must be used when the frame uses RBX as // the base pointer. // cf comment for LCMPXCHG8B_SAVE_EBX. -let Defs = [ECX, EAX, EBX, EFLAGS], Uses = [ECX, EAX, EBX], +let Defs = [EBX], Uses = [ECX, EAX, EBX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst" in { def MWAITX_SAVE_EBX : @@ -909,7 +909,7 @@ def MWAITX_SAVE_EBX : []>; } // Same as MWAITX_SAVE_EBX but for the case where RBX is the base pointer. -let Defs = [ECX, EAX, EBX, EFLAGS], Uses = [ECX, EAX, EBX], +let Defs = [EBX], Uses = [ECX, EAX, EBX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in { def MWAITX_SAVE_RBX : @@ -920,13 +920,11 @@ def MWAITX_SAVE_RBX : } // Pseudo mwaitx instruction to use for custom insertion. -let Defs = [ECX, EAX, EBX, EFLAGS], Uses = [ECX, EAX, EBX], - Predicates = [HasMWAITX], SchedRW = [WriteSystem], +let Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { def MWAITX : - I<0, Pseudo, (outs), - (ins GR32:$ecx, GR32:$eax, GR32:$ebx), + I<0, Pseudo, (outs), (ins GR32:$ecx, GR32:$eax, GR32:$ebx), "mwaitx", [(int_x86_mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>; } From 1065f3439bad59323f16e7c8ee568c7d94dcd952 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 4 Oct 2020 15:34:41 -0700 Subject: [PATCH 519/544] [DomTree] findNearestCommonDominator: assert the nodes are in tree i.e. they cannot be unreachable from the entry (which usually indicate usage errors). This change allows the removal of some nullptr checks. Reviewed By: kuhar Differential Revision: https://reviews.llvm.org/D88758 --- llvm/include/llvm/Support/GenericDomTree.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index c77168432058a..4bed550f44c0f 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -463,8 +463,8 @@ class DominatorTreeBase { return this->Roots[0]; } - /// findNearestCommonDominator - Find nearest common dominator basic block - /// for basic block A and B. If there is no such block then return nullptr. + /// Find nearest common dominator basic block for basic block A and B. A and B + /// must have tree nodes. NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) const { assert(A && B && "Pointers are not valid"); assert(A->getParent() == B->getParent() && @@ -480,18 +480,18 @@ class DominatorTreeBase { DomTreeNodeBase *NodeA = getNode(A); DomTreeNodeBase *NodeB = getNode(B); - - if (!NodeA || !NodeB) return nullptr; + assert(NodeA && "A must be in the tree"); + assert(NodeB && "B must be in the tree"); // Use level information to go up the tree until the levels match. Then // continue going up til we arrive at the same node. - while (NodeA && NodeA != NodeB) { + while (NodeA != NodeB) { if (NodeA->getLevel() < NodeB->getLevel()) std::swap(NodeA, NodeB); NodeA = NodeA->IDom; } - return NodeA ? NodeA->getBlock() : nullptr; + return NodeA->getBlock(); } const NodeT *findNearestCommonDominator(const NodeT *A, From ea83e0b17ecf5dc0cf228afb334aa72ce9b5aec1 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 4 Oct 2020 15:42:03 -0700 Subject: [PATCH 520/544] llvm-dwarfdump: Dump address forms in their encoded length rather than always in 64 bits Few places did this already - refactor them all into a common helper. --- llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 3 +++ llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp | 5 +++-- llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp | 9 ++++++--- llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp | 5 +++-- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 2 +- llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp | 8 +++++++- llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir | 8 ++++---- .../DebugInfo/MIR/Hexagon/bundled-call-pr44001.mir | 2 +- llvm/test/DebugInfo/Mips/dbg-call-site-low-pc.ll | 2 +- llvm/test/DebugInfo/X86/debug-loc-offset.mir | 4 ++-- llvm/test/DebugInfo/X86/debug_addr.ll | 6 +++--- llvm/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s | 4 ++-- llvm/test/MC/ARM/dwarf-asm-nonstandard-section.s | 4 ++-- llvm/test/MC/ARM/dwarf-asm-single-section.s | 4 ++-- llvm/test/MC/MachO/gen-dwarf.s | 10 +++++----- llvm/test/MC/WebAssembly/debug-localvar.ll | 8 ++++---- llvm/test/MC/WebAssembly/dwarfdump.ll | 8 ++++---- llvm/test/tools/llvm-dwarfdump/X86/gnu_call_site.s | 6 +++--- llvm/test/tools/llvm-dwarfdump/X86/tombstone.s | 9 ++++----- 19 files changed, 60 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 3f1be4e5a5925..1342e645934cc 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -82,6 +82,9 @@ class DWARFFormValue { void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const; void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts, object::SectionedAddress SA) const; + void dumpAddress(raw_ostream &OS, uint64_t Address) const; + static void dumpAddress(raw_ostream &OS, uint8_t AddressSize, + uint64_t Address); static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS, DIDumpOptions DumpOpts, uint64_t SectionIndex); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp index ddf307de22213..25d2e852a7fea 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp @@ -18,8 +18,9 @@ void DWARFAddressRange::dump(raw_ostream &OS, uint32_t AddressSize, const DWARFObject *Obj) const { OS << (DumpOpts.DisplayRawContents ? " " : "["); - OS << format("0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, LowPC) - << format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2, HighPC); + DWARFFormValue::dumpAddress(OS, AddressSize, LowPC); + OS << ", "; + DWARFFormValue::dumpAddress(OS, AddressSize, HighPC); OS << (DumpOpts.DisplayRawContents ? "" : ")"); if (Obj) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp index 381dd476cd585..598e3ecee30ef 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -20,9 +21,11 @@ using namespace llvm; void DWARFDebugArangeSet::Descriptor::dump(raw_ostream &OS, uint32_t AddressSize) const { - OS << format("[0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, Address) - << format(" 0x%*.*" PRIx64 ")", AddressSize * 2, AddressSize * 2, - getEndAddress()); + OS << '['; + DWARFFormValue::dumpAddress(OS, AddressSize, Address); + OS << ", "; + DWARFFormValue::dumpAddress(OS, AddressSize, getEndAddress()); + OS << ')'; } void DWARFDebugArangeSet::clear() { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp index a8e7cdeeafbc1..cc806739e19e7 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp @@ -8,6 +8,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -201,7 +202,7 @@ void RangeListEntry::dump( CurrentBase = Value0; if (!DumpOpts.Verbose) return; - OS << format(" 0x%*.*" PRIx64, AddrSize * 2, AddrSize * 2, Value0); + DWARFFormValue::dumpAddress(OS << ' ', AddrSize, Value0); break; } case dwarf::DW_RLE_base_address: @@ -209,7 +210,7 @@ void RangeListEntry::dump( CurrentBase = Value0; if (!DumpOpts.Verbose) return; - OS << format(" 0x%*.*" PRIx64, AddrSize * 2, AddrSize * 2, Value0); + DWARFFormValue::dumpAddress(OS << ' ', AddrSize, Value0); break; case dwarf::DW_RLE_start_length: PrintRawEntry(OS, *this, AddrSize, DumpOpts); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 04161e09d3e20..f07f4e362568f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -284,7 +284,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, // Print the actual address rather than the offset. uint64_t LowPC, HighPC, Index; if (Die.getLowAndHighPC(LowPC, HighPC, Index)) - OS << format("0x%016" PRIx64, HighPC); + DWARFFormValue::dumpAddress(OS, U->getAddressByteSize(), HighPC); else FormValue.dump(OS, DumpOpts); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index a7da5acc380b5..7a84605211fb3 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -358,10 +358,16 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, return !errorToBool(std::move(Err)); } +void DWARFFormValue::dumpAddress(raw_ostream &OS, uint8_t AddressSize, + uint64_t Address) { + uint8_t HexDigits = AddressSize * 2; + OS << format("0x%*.*" PRIx64, HexDigits, HexDigits, Address); +} + void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts, object::SectionedAddress SA) const { - OS << format("0x%016" PRIx64, SA.Address); + dumpAddress(OS, U->getAddressByteSize(), SA.Address); dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts, SA.SectionIndex); } diff --git a/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir b/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir index fb201ab523aad..80f34a9f4f37f 100644 --- a/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir +++ b/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir @@ -13,12 +13,12 @@ # CHECK: DW_AT_stmt_list (0x00000000) # CHECK: DW_AT_comp_dir ("/") # CHECK: DW_AT_APPLE_optimized (true) -# CHECK: DW_AT_low_pc (0x0000000000000000) -# CHECK: DW_AT_high_pc (0x0000000000000008) +# CHECK: DW_AT_low_pc (0x00000000) +# CHECK: DW_AT_high_pc (0x00000008) # CHECK: DW_TAG_subprogram -# CHECK: DW_AT_low_pc (0x0000000000000000) -# CHECK: DW_AT_high_pc (0x0000000000000008) +# CHECK: DW_AT_low_pc (0x00000000) +# CHECK: DW_AT_high_pc (0x00000008) # CHECK: DW_AT_APPLE_omit_frame_ptr (true) # CHECK: DW_AT_frame_base (DW_OP_reg13 SP) # CHECK: DW_AT_name ("f") diff --git a/llvm/test/DebugInfo/MIR/Hexagon/bundled-call-pr44001.mir b/llvm/test/DebugInfo/MIR/Hexagon/bundled-call-pr44001.mir index 7f362cd550eb5..1b2776775fc50 100644 --- a/llvm/test/DebugInfo/MIR/Hexagon/bundled-call-pr44001.mir +++ b/llvm/test/DebugInfo/MIR/Hexagon/bundled-call-pr44001.mir @@ -4,7 +4,7 @@ # CHECK-LABEL: DW_TAG_GNU_call_site # CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x[[BAR_ADDR:[0-9a-f]+]] => {0x{{0*}}[[BAR_ADDR]]} "bar") -# CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000008 ".text") +# CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x00000008 ".text") --- | ; ModuleID = 'bundled-call-pr44001.ll' diff --git a/llvm/test/DebugInfo/Mips/dbg-call-site-low-pc.ll b/llvm/test/DebugInfo/Mips/dbg-call-site-low-pc.ll index 46b1bc6b54ad2..abe3ad640f670 100644 --- a/llvm/test/DebugInfo/Mips/dbg-call-site-low-pc.ll +++ b/llvm/test/DebugInfo/Mips/dbg-call-site-low-pc.ll @@ -22,7 +22,7 @@ ;; Test mips, mipsel, mips64, mips64el: ; CHECK: DW_TAG_GNU_call_site ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "f1" -; CHECK-NEXT: DW_AT_low_pc (0x0000000000000010) +; CHECK-NEXT: DW_AT_low_pc (0x{{(00000000)?}}00000010) ; ModuleID = 'm.c' source_filename = "m.c" diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.mir b/llvm/test/DebugInfo/X86/debug-loc-offset.mir index a6eef5b19625e..f9e74d2bed8c0 100644 --- a/llvm/test/DebugInfo/X86/debug-loc-offset.mir +++ b/llvm/test/DebugInfo/X86/debug-loc-offset.mir @@ -32,7 +32,7 @@ # Checking that we have two compile units with two sets of high/lo_pc. # CHECK: .debug_info contents # CHECK: DW_TAG_compile_unit -# CHECK: DW_AT_low_pc {{.*}} (0x0000000000000020 ".text") +# CHECK: DW_AT_low_pc {{.*}} (0x00000020 ".text") # CHECK: DW_AT_high_pc # # CHECK: DW_TAG_subprogram @@ -51,7 +51,7 @@ # CHECK-NOT: DW_AT_location # # CHECK: DW_TAG_compile_unit -# CHECK: DW_AT_low_pc {{.*}} (0x0000000000000000 ".text") +# CHECK: DW_AT_low_pc {{.*}} (0x00000000 ".text") # CHECK: DW_AT_high_pc # # CHECK: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/X86/debug_addr.ll b/llvm/test/DebugInfo/X86/debug_addr.ll index 1f56e5880167f..6087f452c1c48 100644 --- a/llvm/test/DebugInfo/X86/debug_addr.ll +++ b/llvm/test/DebugInfo/X86/debug_addr.ll @@ -20,7 +20,7 @@ ; DWARF4: DW_AT_GNU_dwo_name{{.*}}test.dwo ; DWARF4: DW_AT_GNU_addr_base{{.*}}0x00000000 ; DWARF4: DW_TAG_GNU_call_site -; DWARF4: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000002) address = 0x0000000000000018 ".text") +; DWARF4: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000002) address = 0x00000018 ".text") ; DWARF4: .debug_addr contents: ; DWARF4-NEXT: Addrs: [ ; DWARF4-NEXT: 0x00000000 @@ -35,8 +35,8 @@ ; DWARF5-NOT: DW_TAG_{{.*}} ; DWARF5: DW_AT_dwo_name{{.*}}test.dwo ; DWARF5: DW_AT_addr_base{{.*}}0x00000008 -; DWARF5: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000000000 ".text") -; DWARF5: DW_AT_call_return_pc [DW_FORM_addrx] (indexed (00000002) address = 0x0000000000000018 ".text") +; DWARF5: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x00000000 ".text") +; DWARF5: DW_AT_call_return_pc [DW_FORM_addrx] (indexed (00000002) address = 0x00000018 ".text") ; DWARF5: .debug_addr contents: ; DWARF5-NEXT: 0x00000000: Address table header: length = 0x00000010, format = DWARF32, version = 0x0005, addr_size = 0x04, seg_size = 0x00 ; DWARF5-NEXT: Addrs: [ diff --git a/llvm/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s b/llvm/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s index d73a325a043d6..b0ac916091439 100644 --- a/llvm/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s +++ b/llvm/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s @@ -26,8 +26,8 @@ b: // DWARF: .debug_info contents: // DWARF: DW_TAG_compile_unit // DWARF-NOT: DW_TAG_ -// DWARF: DW_AT_low_pc {{.*}}(0x0000000000000000) -// DWARF: DW_AT_high_pc {{.*}}(0x0000000000000004) +// DWARF: DW_AT_low_pc {{.*}}(0x00000000) +// DWARF: DW_AT_high_pc {{.*}}(0x00000004) // DWARF: DW_TAG_label // DWARF-NEXT: DW_AT_name {{.*}}("a") diff --git a/llvm/test/MC/ARM/dwarf-asm-nonstandard-section.s b/llvm/test/MC/ARM/dwarf-asm-nonstandard-section.s index 04fed305040d7..9d1abbf05d7eb 100644 --- a/llvm/test/MC/ARM/dwarf-asm-nonstandard-section.s +++ b/llvm/test/MC/ARM/dwarf-asm-nonstandard-section.s @@ -20,8 +20,8 @@ b: // DWARF: .debug_info contents: // DWARF: DW_TAG_compile_unit // DWARF-NOT: DW_TAG_ -// DWARF: DW_AT_low_pc (0x0000000000000000) -// DWARF: DW_AT_high_pc (0x0000000000000004) +// DWARF: DW_AT_low_pc (0x00000000) +// DWARF: DW_AT_high_pc (0x00000004) // DWARF: DW_TAG_label // DWARF-NEXT: DW_AT_name ("b") diff --git a/llvm/test/MC/ARM/dwarf-asm-single-section.s b/llvm/test/MC/ARM/dwarf-asm-single-section.s index 9ee80a2770a92..781319ffaa002 100644 --- a/llvm/test/MC/ARM/dwarf-asm-single-section.s +++ b/llvm/test/MC/ARM/dwarf-asm-single-section.s @@ -21,8 +21,8 @@ a: // DWARF: .debug_info contents: // DWARF: DW_TAG_compile_unit // DWARF-NOT: DW_TAG_ -// DWARF: DW_AT_low_pc (0x0000000000000000) -// DWARF: DW_AT_high_pc (0x0000000000000004) +// DWARF: DW_AT_low_pc (0x00000000) +// DWARF: DW_AT_high_pc (0x00000004) // DWARF: DW_TAG_label // DWARF-NEXT: DW_AT_name ("a") diff --git a/llvm/test/MC/MachO/gen-dwarf.s b/llvm/test/MC/MachO/gen-dwarf.s index 62dd18a59acbc..a4b979aa1bd5d 100644 --- a/llvm/test/MC/MachO/gen-dwarf.s +++ b/llvm/test/MC/MachO/gen-dwarf.s @@ -37,8 +37,8 @@ _x: .long 1 // We don't check the leading addresses these are at. // CHECK: DW_TAG_compile_unit // CHECK: DW_AT_stmt_list (0x00000000) -// CHECK: DW_AT_low_pc (0x0000000000000000) -// CHECK: DW_AT_high_pc (0x0000000000000008) +// CHECK: DW_AT_low_pc (0x00000000) +// CHECK: DW_AT_high_pc (0x00000008) // We don't check the file name as it is a temp directory // CHECK: DW_AT_name // We don't check the DW_AT_comp_dir which is the current working directory @@ -49,19 +49,19 @@ _x: .long 1 // CHECK: DW_AT_name ("bar") // CHECK: DW_AT_decl_file ([[FILE:".*gen-dwarf.s"]]) // CHECK: DW_AT_decl_line (5) -// CHECK: DW_AT_low_pc (0x0000000000000000) +// CHECK: DW_AT_low_pc (0x00000000) // CHECK: DW_TAG_label // CHECK: DW_AT_name ("foo") // CHECK: DW_AT_decl_file ([[FILE]]) // CHECK: DW_AT_decl_line (9) -// CHECK: DW_AT_low_pc (0x0000000000000007) +// CHECK: DW_AT_low_pc (0x00000007) // CHECK: DW_TAG_label // CHECK: DW_AT_name ("baz") // CHECK: DW_AT_decl_file ([[FILE]]) // CHECK: DW_AT_decl_line (10) -// CHECK: DW_AT_low_pc (0x0000000000000007) +// CHECK: DW_AT_low_pc (0x00000007) // CHECK: NULL diff --git a/llvm/test/MC/WebAssembly/debug-localvar.ll b/llvm/test/MC/WebAssembly/debug-localvar.ll index ffb04e4387d04..46dd42ea97a8a 100644 --- a/llvm/test/MC/WebAssembly/debug-localvar.ll +++ b/llvm/test/MC/WebAssembly/debug-localvar.ll @@ -78,8 +78,8 @@ attributes #2 = { nounwind } ; CHECK-LABEL: DW_TAG_compile_unit ; CHECK-LABEL: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc (0x0000000000000002) -; CHECK-NEXT: DW_AT_high_pc (0x0000000000000039) +; CHECK-NEXT: DW_AT_low_pc (0x00000002) +; CHECK-NEXT: DW_AT_high_pc (0x00000039) ; CHECK-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x0 0x1, DW_OP_stack_value) ; CHECK-NEXT: DW_AT_name ("foo") ; CHECK-NEXT: DW_AT_decl_file ("/s/llvm-upstream{{(/|\\)}}debugtest.c") @@ -102,8 +102,8 @@ attributes #2 = { nounwind } ; CHECK-NEXT: DW_AT_type (0x00000073 "int") ; CHECK-LABEL: DW_TAG_lexical_block -; CHECK-NEXT: DW_AT_low_pc (0x000000000000001c) -; CHECK-NEXT: DW_AT_high_pc (0x000000000000002d) +; CHECK-NEXT: DW_AT_low_pc (0x0000001c) +; CHECK-NEXT: DW_AT_high_pc (0x0000002d) ; CHECK-LABEL: DW_TAG_variable ; CHECK-NEXT: DW_AT_location (DW_OP_fbreg +4) diff --git a/llvm/test/MC/WebAssembly/dwarfdump.ll b/llvm/test/MC/WebAssembly/dwarfdump.ll index e6b3f15ba4144..6a53cdd724a26 100644 --- a/llvm/test/MC/WebAssembly/dwarfdump.ll +++ b/llvm/test/MC/WebAssembly/dwarfdump.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: DW_AT_stmt_list (0x00000000) ; CHECK-NEXT: DW_AT_comp_dir ("/usr/local/google/home/sbc/dev/wasm/simple") ; CHECK-NEXT: DW_AT_GNU_pubnames (true) -; CHECK-NEXT: DW_AT_low_pc (0x0000000000000002) -; CHECK-NEXT: DW_AT_high_pc (0x0000000000000004) +; CHECK-NEXT: DW_AT_low_pc (0x00000002) +; CHECK-NEXT: DW_AT_high_pc (0x00000004) ; CHECK: 0x00000026: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("foo") @@ -44,8 +44,8 @@ ; CHECK-NEXT: DW_AT_prototyped (true) ; CHECK: 0x0000005a: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc (0x0000000000000002) -; CHECK-NEXT: DW_AT_high_pc (0x0000000000000004) +; CHECK-NEXT: DW_AT_low_pc (0x00000002) +; CHECK-NEXT: DW_AT_high_pc (0x00000004) ; CHECK-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) ; CHECK-NEXT: DW_AT_name ("f2") ; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") diff --git a/llvm/test/tools/llvm-dwarfdump/X86/gnu_call_site.s b/llvm/test/tools/llvm-dwarfdump/X86/gnu_call_site.s index 8a0b1f5e3a875..d4ae1d96c1fdf 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/gnu_call_site.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/gnu_call_site.s @@ -5,13 +5,13 @@ # CHECK-NEXT: DW_AT_external (true) # CHECK-NEXT: DW_AT_name ("fn4") # CHECK-NEXT: DW_AT_linkage_name ("test") -# CHECK-NEXT: DW_AT_low_pc (0x0000000000000000) -# CHECK-NEXT: DW_AT_high_pc (0x0000000000000000) +# CHECK-NEXT: DW_AT_low_pc (0x00000000) +# CHECK-NEXT: DW_AT_high_pc (0x00000000) # CHECK-NEXT: DW_AT_frame_base (DW_OP_call_frame_cfa) # CHECK-NEXT: DW_AT_GNU_all_call_sites (true) # CHECK: DW_TAG_GNU_call_site -# CHECK-NEXT: DW_AT_low_pc (0x0000000000000000) +# CHECK-NEXT: DW_AT_low_pc (0x00000000) # CHECK-NEXT: DW_AT_abstract_origin (0x00000021 "test") # CHECK: DW_TAG_GNU_call_site_parameter diff --git a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s index 85a88fc54d894..e8627f31e9f81 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s @@ -13,11 +13,10 @@ # CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000 # CHECK-NEXT: [0x00000042, 0x00000048)) # CHECK: DW_TAG_subprogram -# FIXME: Print address using unit's address size. -# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x00000000ffffffff (dead code)) +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0xffffffff (dead code)) # CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) # CHECK: DW_TAG_subprogram -# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000042) +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x00000042) # CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) # CHECK: DW_TAG_compile_unit # CHECK: DW_AT_addr_base @@ -28,10 +27,10 @@ # CHECK-NEXT: [0x00000042, 0x00000048) # CHECK-NEXT: [0x00000042, 0x00000048)) # CHECK: DW_TAG_subprogram -# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x00000000ffffffff (dead code)) +# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0xffffffff (dead code)) # CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) # CHECK: DW_TAG_subprogram -# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000001) address = 0x0000000000000042) +# CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000001) address = 0x00000042) # CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000006) # CHECK: DW_TAG_compile_unit # CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000018 From 83cc498c38d2e4baaf3a233ae73fc49e24ac8898 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 2 Oct 2020 17:13:27 -0700 Subject: [PATCH 521/544] [NFCI] Remove unnecessary trailing undef in RuntimeLibcalls.def All uses of the file undef the macro already. --- llvm/include/llvm/IR/RuntimeLibcalls.def | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 903db6c704987..75eef02ec70e5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -555,4 +555,3 @@ HANDLE_LIBCALL(RETURN_ADDRESS, nullptr) HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr) -#undef HANDLE_LIBCALL From 2c94d88e076990a7b533578a392a150d4b9b0fa8 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 2 Oct 2020 17:16:22 -0700 Subject: [PATCH 522/544] [NewPM] collapsing nested pass mangers of the same type This is one of the reason for extra invalidations in D84959. In practice, I don't think we have use cases needing this. This simplifies the pipeline a bit and prune corner cases when considering invalidations. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D85676 --- .../test/CodeGen/thinlto-distributed-newpm.ll | 4 ---- llvm/include/llvm/IR/PassManager.h | 16 ++++++++++++++- llvm/test/Other/new-pass-manager.ll | 4 ---- llvm/test/Other/new-pm-defaults.ll | 6 ------ llvm/test/Other/new-pm-lto-defaults.ll | 2 -- llvm/test/Other/new-pm-thinlto-defaults.ll | 6 ------ .../new-pm-thinlto-postlink-pgo-defaults.ll | 6 ------ ...-pm-thinlto-postlink-samplepgo-defaults.ll | 6 ------ .../new-pm-thinlto-prelink-pgo-defaults.ll | 20 ------------------- ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 4 ---- llvm/test/Other/pass-pipeline-parsing.ll | 6 ------ 11 files changed, 15 insertions(+), 65 deletions(-) diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll index 9f9a8bec4ef5d..ec56845a8fdf0 100644 --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -25,7 +25,6 @@ ; CHECK-O: Running pass: LowerTypeTestsPass ; CHECK-O: Invalidating analysis: InnerAnalysisManagerProxy ; CHECK-O: Running pass: ForceFunctionAttrsPass -; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O: Running pass: PGOIndirectCallPromotion ; CHECK-O: Running analysis: ProfileSummaryAnalysis ; CHECK-O: Running analysis: InnerAnalysisManagerProxy @@ -151,8 +150,6 @@ ; CHECK-O: Invalidating analysis: DemandedBitsAnalysis on main ; CHECK-O: Invalidating analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Invalidating analysis: CallGraphAnalysis -; CHECK-O: Finished {{.*}}Module pass manager run. -; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O: Running pass: GlobalOptPass ; CHECK-O: Running pass: GlobalDCEPass ; CHECK-O: Running pass: EliminateAvailableExternallyPass @@ -207,7 +204,6 @@ ; CHECK-O: Running pass: GlobalDCEPass ; CHECK-O: Running pass: ConstantMergePass ; CHECK-O: Finished {{.*}}Module pass manager run. -; CHECK-O: Finished {{.*}}Module pass manager run. target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 6b4f8e3140ee6..44f8900f2ebf5 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -548,7 +548,9 @@ class PassManager : public PassInfoMixin< return PA; } - template void addPass(PassT Pass) { + template + std::enable_if_t::value> + addPass(PassT Pass) { using PassModelT = detail::PassModel; @@ -556,6 +558,18 @@ class PassManager : public PassInfoMixin< Passes.emplace_back(new PassModelT(std::move(Pass))); } + /// When adding a pass manager pass that has the same type as this pass + /// manager, simply move the passes over. This is because we don't have use + /// cases rely on executing nested pass managers. Doing this could reduce + /// implementation complexity and avoid potential invalidation issues that may + /// happen with nested pass managers of the same type. + template + std::enable_if_t::value> + addPass(PassT &&Pass) { + for (auto &P : Pass.Passes) + Passes.emplace_back(std::move(P)); + } + static bool isRequired() { return true; } protected: diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll index 31be3adb68978..70d1f7152120a 100644 --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -207,7 +207,6 @@ ; CHECK-INVALIDATE-ALL: Starting llvm::Module pass manager run ; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL: Running analysis: NoOpModuleAnalysis -; CHECK-INVALIDATE-ALL: Starting llvm::Module pass manager run ; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-NOT: Running analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL: Starting llvm::Function pass manager run @@ -221,7 +220,6 @@ ; CHECK-INVALIDATE-ALL: Invalidating analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL: Running analysis: NoOpModuleAnalysis -; CHECK-INVALIDATE-ALL: Finished llvm::Module pass manager run ; CHECK-INVALIDATE-ALL-NOT: Invalidating analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-NOT: Running analysis: NoOpModuleAnalysis @@ -233,7 +231,6 @@ ; CHECK-INVALIDATE-ALL-CG: Starting llvm::Module pass manager run ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpModuleAnalysis -; CHECK-INVALIDATE-ALL-CG: Starting llvm::Module pass manager run ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG-NOT: Running analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL-CG: Starting CGSCC pass manager run @@ -256,7 +253,6 @@ ; CHECK-INVALIDATE-ALL-CG: Invalidating analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpModuleAnalysis -; CHECK-INVALIDATE-ALL-CG: Finished llvm::Module pass manager run ; CHECK-INVALIDATE-ALL-CG-NOT: Invalidating analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG-NOT: Running analysis: NoOpModuleAnalysis diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 59c24acb17f04..223034312faeb 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -89,10 +89,8 @@ ; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST --check-prefix=CHECK-O23SZ ; CHECK-O: Starting llvm::Module pass manager run. -; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis @@ -220,8 +218,6 @@ ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. -; CHECK-O-NEXT: Finished llvm::Module pass manager run. -; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O2-LTO-NOT: Running pass: EliminateAvailableExternallyPass @@ -271,8 +267,6 @@ ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass -; CHECK-O-NEXT: Finished llvm::Module pass manager run. -; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: PrintModulePass ; ; Make sure we get the IR back out without changes when we print the module. diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index a3be19ca29f1f..fbe6c22283cba 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -24,7 +24,6 @@ ; RUN: --check-prefix=CHECK-O3 --check-prefix=CHECK-EP-Peephole ; CHECK-O: Starting llvm::Module pass manager run. -; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass @@ -102,7 +101,6 @@ ; CHECK-O2-NEXT: Running pass: SimplifyCFGPass ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass -; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: PrintModulePass ; Make sure we get the IR back out without changes when we print the module. diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 0b9b52a57e2a5..07164aafdae45 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -48,12 +48,10 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 ; ; CHECK-O: Starting llvm::Module pass manager run. -; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass ; CHECK-DIS-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-DIS-NEXT: Running pass: AddDiscriminatorsPass -; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-POSTLINK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-POSTLINK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy @@ -190,9 +188,7 @@ ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. -; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass -; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: EliminateAvailableExternallyPass @@ -240,8 +236,6 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: CGProfilePass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass -; CHECK-POSTLINK-O-NEXT: Finished llvm::Module pass manager run. -; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-PRELINK-O-NEXT: Running pass: NameAnonGlobalPass ; CHECK-O-NEXT: Running pass: PrintModulePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 7efc5357253e8..0e287cc156b8b 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -21,10 +21,8 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext --dump-input=fail ; ; CHECK-O: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy @@ -162,8 +160,6 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass @@ -209,8 +205,6 @@ ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: PrintModulePass ; Make sure we get the IR back out without changes when we print the module. diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 9c5e36c5886ac..da63b9580d767 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -26,10 +26,8 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext --dump-input=fail ; ; CHECK-O: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis @@ -173,8 +171,6 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass @@ -220,8 +216,6 @@ ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: PrintModulePass ; Make sure we get the IR back out without changes when we print the module. diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index 45bb71a6d304e..11bd207781d86 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -28,10 +28,8 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O123 --dump-input=fail ; ; CHECK-O: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis @@ -215,27 +213,9 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O23SZ-NEXT: Clearing all analysis results for: -; CHECK-O23SZ-NEXT: Invalidating analysis: DominatorTreeAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: MemorySSAAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: LoopAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: PostDominatorTreeAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: BranchProbabilityAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: BlockFrequencyAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: ScalarEvolutionAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: InnerAnalysisManagerProxy -; CHECK-O23SZ-NEXT: Invalidating analysis: PhiValuesAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: MemoryDependenceAnalysis -; CHECK-O23SZ-NEXT: Invalidating analysis: DemandedBitsAnalysis -; CHECK-O3-NEXT: Invalidating analysis: DominanceFrontierAnalysis -; CHECK-O3-NEXT: Invalidating analysis: RegionInfoAnalysis -; CHECK-O23SZ-NEXT: Clearing all analysis results for: foo ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar ; CHECK-EXT: Running pass: {{.*}}::Bye -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O23SZ-NEXT: Clearing all analysis results for: foo ; CHECK-O-NEXT: Running pass: NameAnonGlobalPass ; CHECK-O-NEXT: Running pass: PrintModulePass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index d97fe18524db8..19a1fd551bf17 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -26,12 +26,10 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O123 --dump-input=fail ; ; CHECK-O: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: AddDiscriminatorsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. @@ -171,9 +169,7 @@ ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: NameAnonGlobalPass ; CHECK-O-NEXT: Running pass: PrintModulePass diff --git a/llvm/test/Other/pass-pipeline-parsing.ll b/llvm/test/Other/pass-pipeline-parsing.ll index eb1d07c01ab9e..adf7554ac503b 100644 --- a/llvm/test/Other/pass-pipeline-parsing.ll +++ b/llvm/test/Other/pass-pipeline-parsing.ll @@ -10,11 +10,9 @@ ; RUN: -passes='module(no-op-module,no-op-module)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-NESTED-TWO-NOOP-MP ; CHECK-NESTED-TWO-NOOP-MP: Starting llvm::Module pass manager run -; CHECK-NESTED-TWO-NOOP-MP: Starting llvm::Module pass manager run ; CHECK-NESTED-TWO-NOOP-MP: Running pass: NoOpModulePass ; CHECK-NESTED-TWO-NOOP-MP: Running pass: NoOpModulePass ; CHECK-NESTED-TWO-NOOP-MP: Finished llvm::Module pass manager run -; CHECK-NESTED-TWO-NOOP-MP: Finished llvm::Module pass manager run ; RUN: opt -disable-output -debug-pass-manager \ ; RUN: -passes=no-op-function,no-op-function %s 2>&1 \ @@ -112,7 +110,6 @@ ; RUN: -passes='module(function(no-op-function),cgscc(no-op-cgscc,function(no-op-function),no-op-cgscc),function(no-op-function))' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-NESTED-MP-CG-FP ; CHECK-NESTED-MP-CG-FP: Starting llvm::Module pass manager run -; CHECK-NESTED-MP-CG-FP: Starting llvm::Module pass manager run ; CHECK-NESTED-MP-CG-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run @@ -127,7 +124,6 @@ ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Finished llvm::Module pass manager run -; CHECK-NESTED-MP-CG-FP: Finished llvm::Module pass manager run ; RUN: opt -disable-output -debug-pass-manager \ ; RUN: -passes='no-op-loop,no-op-loop' %s 2>&1 \ @@ -165,7 +161,6 @@ ; RUN: -passes='module(no-op-function,no-op-loop,no-op-cgscc,cgscc(no-op-function,no-op-loop),function(no-op-loop))' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-ADAPTORS ; CHECK-ADAPTORS: Starting llvm::Module pass manager run -; CHECK-ADAPTORS: Starting llvm::Module pass manager run ; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}NoOpFunctionPass> ; CHECK-ADAPTORS: Running pass: NoOpFunctionPass ; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}> @@ -187,7 +182,6 @@ ; CHECK-ADAPTORS: Running pass: NoOpLoopPass on Loop at depth 1 containing: %loop ; CHECK-ADAPTORS: Finished llvm::Function pass manager run ; CHECK-ADAPTORS: Finished llvm::Module pass manager run -; CHECK-ADAPTORS: Finished llvm::Module pass manager run ; RUN: opt -disable-output -debug-pass-manager \ ; RUN: -passes='cgscc(print)' %s 2>&1 \ From 628a319475dceac307add6953df028ec372e7f4e Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 4 Oct 2020 16:01:05 -0700 Subject: [PATCH 523/544] llvm-dwarfdump: Print addresses in debug_line to the parsed address size --- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 7 +++++-- llvm/test/tools/llvm-dwarfdump/X86/tombstone.s | 5 ++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index ab3022955cdbd..bda41b1f34e9c 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -897,8 +897,11 @@ Error DWARFDebugLine::LineTable::parse( TableData.setAddressSize(ExtractorAddressSize); } - if (Cursor && Verbose) - *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address); + if (Cursor && Verbose) { + *OS << " ("; + DWARFFormValue::dumpAddress(*OS, OpcodeAddressSize, State.Row.Address.Address); + *OS << ')'; + } } break; diff --git a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s index e8627f31e9f81..cb2a3a9d798d8 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s @@ -64,12 +64,11 @@ # CHECK-DAG: .debug_line contents: # CHECK: Address Line # CHECK-NEXT: -------------- -# FIXME: Dump the address with a size-appropriate encoding -# CHECK-NEXT: DW_LNE_set_address (0x00000000ffffffff) +# CHECK-NEXT: DW_LNE_set_address (0xffffffff) # CHECK-NEXT: DW_LNS_copy # CHECK-NEXT: DW_LNS_advance_pc (1) # CHECK-NEXT: DW_LNE_end_sequence -# CHECK-NEXT: DW_LNE_set_address (0x0000000000000042) +# CHECK-NEXT: DW_LNE_set_address (0x00000042) # CHECK-NEXT: DW_LNS_copy # CHECK-NEXT: 0x0000000000000042 1 # CHECK-NEXT: DW_LNS_advance_pc (1) From 4b38ceb0ebd7ed5fe1d5cbaf981060227515fb6e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 4 Oct 2020 16:25:35 -0700 Subject: [PATCH 524/544] [X86] Remove MWAITX_SAVE_EBX pseudo instruction. Always save/restore the full %rbx register even in gnux32. ebx/rbx only needs to be saved when 64-bit registers are supported anyway. It should be fine to save/restore the whole rbx register even in gnux32 where the base is technically just ebx. This matches what we do for cmpxchg16b where rbx is saved/restored regardless of gnux32. --- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 5 +---- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++---------- llvm/lib/Target/X86/X86InstrCompiler.td | 14 ++------------ .../CodeGen/X86/base-pointer-and-mwaitx.ll | 12 ++++++------ 4 files changed, 17 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index a07e165633bb6..7a593b8ff7093 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -442,7 +442,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBB.erase(MBBI); return true; } - case X86::MWAITX_SAVE_EBX: case X86::MWAITX_SAVE_RBX: { // Perform the following transformation. // SaveRbx = pseudomwaitx InArg, SaveRbx @@ -458,9 +457,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr)); // Finally, restore the value of RBX. Register SaveRbx = MBBI->getOperand(2).getReg(); - unsigned BasePointer = Opcode == X86::MWAITX_SAVE_EBX ? X86::EBX : X86::RBX; - TII->copyPhysReg(MBB, MBBI, DL, BasePointer, SaveRbx, - /*SrcIsKill*/ true); + TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true); // Delete the pseudo. MBBI->eraseFromParent(); return true; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9b5412c945ff8..47aad8965e1c8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33793,7 +33793,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Register BasePtr = TRI->getBaseRegister(); bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX); // If no need to save the base pointer, we generate MWAITXrrr, - // else we generate pseudo MWAITX_SAVE_RBX/EBX. + // else we generate pseudo MWAITX_SAVE_RBX. if (!IsRBX || !TRI->hasBasePointer(*MF)) { BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) .addReg(MI.getOperand(0).getReg()); @@ -33812,17 +33812,15 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(MI.getOperand(0).getReg()); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) .addReg(MI.getOperand(1).getReg()); - const TargetRegisterClass *RegClass = - BasePtr == X86::EBX ? &X86::GR32RegClass : &X86::GR64RegClass; - // Save RBX (or EBX) into a virtual register. - Register SaveRBX = MF->getRegInfo().createVirtualRegister(RegClass); + assert(Subtarget.is64Bit() && "Expected 64-bit mode!"); + // Save RBX into a virtual register. + Register SaveRBX = + MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) - .addReg(BasePtr); + .addReg(X86::RBX); // Generate mwaitx pseudo. - unsigned Opcode = - BasePtr == X86::RBX ? X86::MWAITX_SAVE_RBX : X86::MWAITX_SAVE_EBX; - Register Dst = MF->getRegInfo().createVirtualRegister(RegClass); - BuildMI(*BB, MI, DL, TII->get(Opcode)) + Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); + BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX)) .addDef(Dst) // Destination tied in with SaveRBX. .addReg(MI.getOperand(2).getReg()) // input value of EBX. .addUse(SaveRBX); // Save of base pointer. diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 0f78b7d35aa6f..c20a2b88e1187 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -896,19 +896,9 @@ def LCMPXCHG16B_SAVE_RBX : GR64:$rbx_save))]>; } -// This pseudo must be used when the frame uses RBX as +// This pseudo must be used when the frame uses RBX/EBX as // the base pointer. -// cf comment for LCMPXCHG8B_SAVE_EBX. -let Defs = [EBX], Uses = [ECX, EAX, EBX], - Predicates = [HasMWAITX], SchedRW = [WriteSystem], - isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst" in { -def MWAITX_SAVE_EBX : - I<0, Pseudo, (outs GR32:$dst), - (ins GR32:$ebx_input, GR32:$ebx_save), - "mwaitx", - []>; -} -// Same as MWAITX_SAVE_EBX but for the case where RBX is the base pointer. +// cf comment for LCMPXCHG16B_SAVE_RBX. let Defs = [EBX], Uses = [ECX, EAX, EBX], Predicates = [HasMWAITX], SchedRW = [WriteSystem], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in { diff --git a/llvm/test/CodeGen/X86/base-pointer-and-mwaitx.ll b/llvm/test/CodeGen/X86/base-pointer-and-mwaitx.ll index fead4c650c0af..55fd730375e22 100644 --- a/llvm/test/CodeGen/X86/base-pointer-and-mwaitx.ll +++ b/llvm/test/CodeGen/X86/base-pointer-and-mwaitx.ll @@ -42,12 +42,12 @@ entry: ; USE_BASE_32: movl %ecx, %eax ; USE_BASE_32: movl %edx, %ecx ; Save base pointer. -; USE_BASE_32: movl %ebx, [[SAVE_ebx:%e(di|si)]] +; USE_BASE_32: movq %rbx, [[SAVE_rbx:%r(di|si)]] ; Set mwaitx ebx argument. ; USE_BASE_32: movl %r8d, %ebx ; USE_BASE_32-NEXT: mwaitx ; Restore base pointer. -; USE_BASE_32-NEXT: movl [[SAVE_ebx]], %ebx +; USE_BASE_32-NEXT: movq [[SAVE_rbx]], %rbx ; Pass mwaitx 3 arguments in eax, ecx, ebx ; NO_BASE_64: movl %r8d, %ebx @@ -111,12 +111,12 @@ if.end: ; USE_BASE_32: movl %esi, %eax ; USE_BASE_32: movl %edi, %ecx ; Save base pointer. -; USE_BASE_32: movl %ebx, [[SAVE_ebx:%e(di|si)]] +; USE_BASE_32: movq %rbx, [[SAVE_rbx:%r(di|si)]] ; Set mwaitx ebx argument. ; USE_BASE_32: movl %edx, %ebx ; USE_BASE_32-NEXT: mwaitx ; Restore base pointer. -; USE_BASE_32-NEXT: movl [[SAVE_ebx]], %ebx +; USE_BASE_32-NEXT: movq [[SAVE_rbx]], %rbx ; Pass mwaitx 3 arguments in eax, ecx, ebx ; NO_BASE_64: movl %edx, %ebx @@ -179,12 +179,12 @@ if.end: ; USE_BASE_32: movl %esi, %eax ; USE_BASE_32: movl %edi, %ecx ; Save base pointer. -; USE_BASE_32: movl %ebx, [[SAVE_ebx:%e(di|si)]] +; USE_BASE_32: movq %rbx, [[SAVE_rbx:%r(di|si)]] ; Set mwaitx ebx argument. ; USE_BASE_32: movl %edx, %ebx ; USE_BASE_32-NEXT: mwaitx ; Restore base pointer. -; USE_BASE_32-NEXT: movl [[SAVE_ebx]], %ebx +; USE_BASE_32-NEXT: movq [[SAVE_rbx]], %rbx ; Pass mwaitx 3 arguments in eax, ecx, ebx ; NO_BASE_64: movl %edx, %ebx From 92c45e4ee2511399484e8af26b66ba37ad0ed8e7 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 4 Oct 2020 17:50:24 -0700 Subject: [PATCH 525/544] llvm-dwarfdump: Add support for DW_RLE_startx_endx --- .../DebugInfo/DWARF/DWARFDebugRnglists.cpp | 33 ++++++++++++++---- .../tools/llvm-dwarfdump/X86/debug_rnglists.s | 22 ++++++++---- .../test/tools/llvm-dwarfdump/X86/tombstone.s | 34 +++++++++++-------- 3 files changed, 62 insertions(+), 27 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp index cc806739e19e7..d12acca1962e6 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp @@ -37,12 +37,9 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t *OffsetPtr) { break; } case dwarf::DW_RLE_startx_endx: - consumeError(C.takeError()); - return createStringError( - errc::not_supported, - "unsupported rnglists encoding DW_RLE_startx_endx at " - "offset 0x%" PRIx64, - Offset); + Value0 = Data.getULEB128(C); + Value1 = Data.getULEB128(C); + break; case dwarf::DW_RLE_startx_length: { Value0 = Data.getULEB128(C); Value1 = Data.getULEB128(C); @@ -150,6 +147,19 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( E.HighPC = E.LowPC + RLE.Value1; break; } + case dwarf::DW_RLE_startx_endx: { + auto Start = LookupPooledAddress(RLE.Value0); + if (!Start) + Start = {0, -1ULL}; + auto End = LookupPooledAddress(RLE.Value1); + if (!End) + End = {0, -1ULL}; + // FIXME: Some error handling if Start.SectionIndex != End.SectionIndex + E.SectionIndex = Start->SectionIndex; + E.LowPC = Start->Address; + E.HighPC = End->Address; + break; + } default: // Unsupported encodings should have been reported during extraction, // so we should not run into any here. @@ -235,6 +245,17 @@ void RangeListEntry::dump( DWARFAddressRange(Start, Start + Value1).dump(OS, AddrSize, DumpOpts); break; } + case dwarf::DW_RLE_startx_endx: { + PrintRawEntry(OS, *this, AddrSize, DumpOpts); + uint64_t Start = 0; + if (auto SA = LookupPooledAddress(Value0)) + Start = SA->Address; + uint64_t End = 0; + if (auto SA = LookupPooledAddress(Value1)) + End = SA->Address; + DWARFAddressRange(Start, End).dump(OS, AddrSize, DumpOpts); + break; + } default: llvm_unreachable("Unsupported range list encoding"); } diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_rnglists.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_rnglists.s index c9c94637b4d3d..07c9fe6d0f36e 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/debug_rnglists.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_rnglists.s @@ -1,8 +1,8 @@ # RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o -# RUN: not llvm-dwarfdump --debug-rnglists %t.o 2> %t.err | FileCheck %s --check-prefixes=TERSE,BOTH -# RUN: FileCheck %s --input-file %t.err --check-prefix=ERR -# RUN: not llvm-dwarfdump -v --debug-rnglists %t.o 2> %t.err | FileCheck %s --check-prefixes=VERBOSE,BOTH -# RUN: FileCheck %s --input-file %t.err --check-prefix=ERR +# RUN: llvm-dwarfdump --debug-rnglists %t.o 2> %t.err | FileCheck %s --check-prefixes=TERSE,BOTH +# RUN: FileCheck %s --allow-empty --input-file %t.err --check-prefix=ERR +# RUN: llvm-dwarfdump -v --debug-rnglists %t.o 2> %t.err | FileCheck %s --check-prefixes=VERBOSE,BOTH +# RUN: FileCheck %s --allow-empty --input-file %t.err --check-prefix=ERR # BOTH: .debug_rnglists contents: # TERSE-NEXT: range list header: length = 0x00000037, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 @@ -74,6 +74,18 @@ # VERBOSE-NEXT: 0x{{[0-9a-f]*}}: # VERBOSE-SAME: range list header: length = 0x0000000c, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 +# BOTH-NEXT: ranges: +# TERSE-NEXT: [0x0000000000000000, 0x0000000000000000) +# TERSE-NEXT: + +# VERBOSE-NEXT: 0x00000091: [DW_RLE_startx_endx]: 0x0000000000000001, 0x000000000000000a => [0x0000000000000000, 0x0000000000000000) +# VERBOSE-NEXT: 0x00000094: [DW_RLE_end_of_list] + +# TERSE-NEXT: range list header: length = 0x0000000c, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 + +# VERBOSE-NEXT: 0x{{[0-9a-f]*}}: +# VERBOSE-SAME: range list header: length = 0x0000000c, format = DWARF32, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 + # BOTH-NEXT: ranges: # TERSE-NEXT: [0x0000000000000000, 0x000000000000002a) # TERSE-NEXT: @@ -110,8 +122,6 @@ # BOTH-NOT: range list header: -# ERR-NOT: error: -# ERR: error: unsupported rnglists encoding DW_RLE_startx_endx at offset 0x91 # ERR-NOT: error: .section .debug_rnglists,"",@progbits diff --git a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s index cb2a3a9d798d8..3465d08bf261e 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/tombstone.s @@ -24,6 +24,7 @@ # CHECK-NEXT: [0x00000042, 0x00000048) # CHECK-NEXT: [0x00000042, 0x00000048) # CHECK-NEXT: [0x00000042, 0x00000048) +# CHECK-NEXT: [0x00000042, 0x00000042) # CHECK-NEXT: [0x00000042, 0x00000048) # CHECK-NEXT: [0x00000042, 0x00000048)) # CHECK: DW_TAG_subprogram @@ -48,10 +49,11 @@ # entire rnglists contribution (since there's no way to know where such a # contribution starts) - rather than assuming one starts at 0. -# CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000057) +# CHECK: DW_AT_ranges # [0x0000000000000042, 0x0000000000000048) # [0x0000000000000042, 0x0000000000000048) # [0x0000000000000042, 0x0000000000000048) +# [0x0000000000000042, 0x0000000000000042) # [0x0000000000000042, 0x0000000000000048) # [0x0000000000000042, 0x0000000000000048)) # CHECK: DW_TAG_subprogram @@ -127,6 +129,8 @@ # CHECK-NEXT: [DW_RLE_startx_length]: 0x00000001, 0x00000006 # CHECK-NEXT: [DW_RLE_start_end ]: [0xffffffff, 0xffffffff) # CHECK-NEXT: [DW_RLE_start_end ]: [0x00000042, 0x00000048) +# CHECK-NEXT: [DW_RLE_startx_endx ]: 0x00000000, 0x00000000 +# CHECK-NEXT: [DW_RLE_startx_endx ]: 0x00000001, 0x00000001 # CHECK-NEXT: [DW_RLE_base_address ]: 0x00000040 # CHECK-NEXT: [DW_RLE_offset_pair ]: 0x00000002, 0x00000008 => [0x00000042, 0x00000048) # CHECK-NEXT: [DW_RLE_base_address ]: 0xffffffff @@ -146,6 +150,8 @@ # CHECK-NEXT: [DW_RLE_startx_length]: 0x0000000000000001, 0x0000000000000006 # CHECK-NEXT: [DW_RLE_start_end ]: [0xffffffffffffffff, 0xffffffffffffffff) # CHECK-NEXT: [DW_RLE_start_end ]: [0x0000000000000042, 0x0000000000000048) +# CHECK-NEXT: [DW_RLE_startx_endx ]: 0x0000000000000000, 0x0000000000000000 +# CHECK-NEXT: [DW_RLE_startx_endx ]: 0x0000000000000001, 0x0000000000000001 # CHECK-NEXT: [DW_RLE_base_address ]: 0x0000000000000040 # CHECK-NEXT: [DW_RLE_offset_pair ]: 0x0000000000000002, 0x0000000000000008 => [0x0000000000000042, 0x0000000000000048) # CHECK-NEXT: [DW_RLE_base_address ]: 0xffffffffffffffff @@ -304,13 +310,12 @@ .byte 6 # DW_RLE_start_end .long 0x42 # start address .long 0x48 # length -# FIXME: RLE_startx_endx unsupported by llvm-dwarfdump -# .byte 2 # DW_RLE_startx_endx -# .uleb128 0 # start address -# .uleb128 0 # length -# .byte 2 # DW_RLE_startx_endx -# .uleb128 1 # start address -# .uleb128 1 # length + .byte 2 # DW_RLE_startx_endx + .uleb128 0 # start index + .uleb128 0 # end index + .byte 2 # DW_RLE_startx_endx + .uleb128 1 # start index + .uleb128 1 # end index .byte 5 # DW_RLE_base_address .long 0x40 # address .byte 4 # DW_RLE_offset_pair @@ -358,13 +363,12 @@ .byte 6 # DW_RLE_start_end .quad 0x42 # start address .quad 0x48 # length -# FIXME: RLE_startx_endx unsupported by llvm-dwarfdump -# .byte 2 # DW_RLE_startx_endx -# .uleb128 0 # start address -# .uleb128 0 # length -# .byte 2 # DW_RLE_startx_endx -# .uleb128 1 # start address -# .uleb128 1 # length + .byte 2 # DW_RLE_startx_endx + .uleb128 0 # start index + .uleb128 0 # end index + .byte 2 # DW_RLE_startx_endx + .uleb128 1 # start index + .uleb128 1 # end index .byte 5 # DW_RLE_base_address .quad 0x40 # address .byte 4 # DW_RLE_offset_pair From 64f7790e7d2309b5d38949921a256acf8068e659 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 30 Sep 2020 08:01:02 -0400 Subject: [PATCH 526/544] [HIP] Add option --gpu-instrument-lib= Add an option --gpu-instrument-lib= to allow users to specify an instrument device library. This is for supporting -finstrument in device code for debugging/profiling tools. Differential Revision: https://reviews.llvm.org/D88557 --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/HIP.cpp | 11 +++++++++++ .../Driver/Inputs/hip_multiple_inputs/instrument.bc | 0 clang/test/Driver/hip-device-libs.hip | 10 ++++++++++ 4 files changed, 24 insertions(+) create mode 100644 clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 672a833c9d4da..18a1234762536 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -672,6 +672,9 @@ defm gpu_allow_device_init : OptInFFlag<"gpu-allow-device-init", def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; +def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, + HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " + "__cyg_profile_func_enter and __cyg_profile_func_exit">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 07d72c073b4b6..f1044f316fc84 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -330,6 +330,17 @@ void HIPToolChain::addClangTargetOptions( RocmInstallation.addCommonBitcodeLibCC1Args( DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); + + // Add instrument lib. + auto InstLib = + DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); + if (InstLib.empty()) + return; + if (llvm::sys::fs::exists(InstLib)) { + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(InstLib)); + } else + getDriver().Diag(diag::err_drv_no_such_file) << InstLib; } } diff --git a/clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc b/clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 3dd798476e2ba..1ffaeda183900 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -105,6 +105,15 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL +// Test --gpu-instrument-lib +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: --gpu-instrument-lib=%S/Inputs/hip_multiple_inputs/instrument.bc \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST + +// ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}ocml.bc" @@ -118,3 +127,4 @@ // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc" +// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc" From fef0ebbc0b39167656bd11283e3084b000b309dd Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Sun, 4 Oct 2020 21:27:29 -0400 Subject: [PATCH 527/544] Revert "[HIP] Add option --gpu-instrument-lib=" This reverts commit 64f7790e7d2309b5d38949921a256acf8068e659 due to regression in hip-device-libs.hip. --- clang/include/clang/Driver/Options.td | 3 --- clang/lib/Driver/ToolChains/HIP.cpp | 11 ----------- .../Driver/Inputs/hip_multiple_inputs/instrument.bc | 0 clang/test/Driver/hip-device-libs.hip | 10 ---------- 4 files changed, 24 deletions(-) delete mode 100644 clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 18a1234762536..672a833c9d4da 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -672,9 +672,6 @@ defm gpu_allow_device_init : OptInFFlag<"gpu-allow-device-init", def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; -def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, - HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " - "__cyg_profile_func_enter and __cyg_profile_func_exit">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index f1044f316fc84..07d72c073b4b6 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -330,17 +330,6 @@ void HIPToolChain::addClangTargetOptions( RocmInstallation.addCommonBitcodeLibCC1Args( DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); - - // Add instrument lib. - auto InstLib = - DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); - if (InstLib.empty()) - return; - if (llvm::sys::fs::exists(InstLib)) { - CC1Args.push_back("-mlink-builtin-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(InstLib)); - } else - getDriver().Diag(diag::err_drv_no_such_file) << InstLib; } } diff --git a/clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc b/clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 1ffaeda183900..3dd798476e2ba 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -105,15 +105,6 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL -// Test --gpu-instrument-lib -// RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: --cuda-gpu-arch=gfx900 \ -// RUN: --rocm-path=%S/Inputs/rocm \ -// RUN: --gpu-instrument-lib=%S/Inputs/hip_multiple_inputs/instrument.bc \ -// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST - -// ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}ocml.bc" @@ -127,4 +118,3 @@ // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc" -// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc" From 9756a402f297d0030689aaade3651785b7496649 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Sun, 4 Oct 2020 21:32:35 -0400 Subject: [PATCH 528/544] Recommit "[HIP] Add option --gpu-instrument-lib=" recommit 64f7790e7d2309b5d38949921a256acf8068e659 after fixing hip-device-libs.hip. --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/HIP.cpp | 11 +++++++++++ .../Inputs/hip_multiple_inputs/instrument.bc | 0 clang/test/Driver/hip-device-libs.hip | 14 ++++++++++++-- 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 672a833c9d4da..18a1234762536 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -672,6 +672,9 @@ defm gpu_allow_device_init : OptInFFlag<"gpu-allow-device-init", def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; +def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, + HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " + "__cyg_profile_func_enter and __cyg_profile_func_exit">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 07d72c073b4b6..f1044f316fc84 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -330,6 +330,17 @@ void HIPToolChain::addClangTargetOptions( RocmInstallation.addCommonBitcodeLibCC1Args( DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); + + // Add instrument lib. + auto InstLib = + DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); + if (InstLib.empty()) + return; + if (llvm::sys::fs::exists(InstLib)) { + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(InstLib)); + } else + getDriver().Diag(diag::err_drv_no_such_file) << InstLib; } } diff --git a/clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc b/clang/test/Driver/Inputs/hip_multiple_inputs/instrument.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 3dd798476e2ba..c3e89d1a4fed0 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -92,7 +92,7 @@ // Test --hip-device-lib-path flag // RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: --cuda-gpu-arch=gfx803 -nogpuinc \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD @@ -101,10 +101,19 @@ // Test environment variable HIP_DEVICE_LIB_PATH // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: %clang -### -target x86_64-linux-gnu \ -// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --cuda-gpu-arch=gfx900 -nogpuinc \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL +// Test --gpu-instrument-lib +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: --gpu-instrument-lib=%S/Inputs/hip_multiple_inputs/instrument.bc \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST + +// ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}ocml.bc" @@ -118,3 +127,4 @@ // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc" +// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc" From 5b551b79d3bba5a8a282bf5f72c7baaccf925870 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 2 Oct 2020 08:08:26 -0400 Subject: [PATCH 529/544] [HIP] Fix default output file for -E By convention the default output file for -E is "-" (stdout). This is expected by tools like ccache, which uses output of -E to determine if a file and its dependence has changed. Currently clang does not use stdout as default output file for -E for HIP, which causes ccache not working. This patch fixes that. Differential Revision: https://reviews.llvm.org/D88730 --- clang/lib/Driver/Driver.cpp | 14 +++++++- clang/test/Driver/hip-output-file-name.hip | 42 ++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 96798b3d0adbb..6f2a030290ed7 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4604,6 +4604,17 @@ static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue, return Args.MakeArgString(Filename.c_str()); } +static bool HasPreprocessOutput(const Action &JA) { + if (isa(JA)) + return true; + if (isa(JA) && isa(JA.getInputs()[0])) + return true; + if (isa(JA) && + HasPreprocessOutput(*(JA.getInputs()[0]))) + return true; + return false; +} + const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, const char *BaseInput, StringRef BoundArch, bool AtTopLevel, @@ -4629,8 +4640,9 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, } // Default to writing to stdout? - if (AtTopLevel && !CCGenDiagnostics && isa(JA)) + if (AtTopLevel && !CCGenDiagnostics && HasPreprocessOutput(JA)) { return "-"; + } // Is this the assembly listing for /FA? if (JA.getType() == types::TY_PP_Asm && diff --git a/clang/test/Driver/hip-output-file-name.hip b/clang/test/Driver/hip-output-file-name.hip index d57f7e87f89e1..b0b1a9d7ff3d2 100644 --- a/clang/test/Driver/hip-output-file-name.hip +++ b/clang/test/Driver/hip-output-file-name.hip @@ -7,3 +7,45 @@ // RUN: 2>&1 | FileCheck %s // CHECK: {{.*}}clang-offload-bundler{{.*}}"-outputs=hip-output-file-name.o" + +// Check -E default output is "-" (stdout). + +// RUN: %clang -### -E -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=DASH %s + +// RUN: %clang -### -E -save-temps -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=DASH %s + +// RUN: %clang -### -E --cuda-device-only -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=CLANG-DASH %s + +// RUN: %clang -### -E --cuda-host-only -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=CLANG-DASH %s + +// DASH: {{.*}}clang-offload-bundler{{.*}}"-outputs=-" +// CLANG-DASH: {{.*}}clang{{.*}}"-o" "-" + +// Check -E with -o. + +// RUN: %clang -### -E -o test.cui -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=OUT %s + +// RUN: %clang -### -E -o test.cui -save-temps -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=OUT %s + +// RUN: %clang -### -E -o test.cui --cuda-device-only -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=CLANG-OUT %s + +// RUN: %clang -### -E -o test.cui --cuda-host-only -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: 2>&1 | FileCheck -check-prefixes=CLANG-OUT %s + +// OUT: {{.*}}clang-offload-bundler{{.*}}"-outputs=test.cui" +// CLANG-OUT: {{.*}}clang{{.*}}"-o" "test.cui" From e372c1d7624e2402a5f91a640780fb32649922b6 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 29 Sep 2020 23:52:03 -0400 Subject: [PATCH 530/544] [HIP] Fix -fgpu-allow-device-init option The option needs to be passed to both host and device compilation. Differential Revision: https://reviews.llvm.org/D88550 --- clang/lib/Driver/ToolChains/Clang.cpp | 11 ++++++++--- clang/lib/Driver/ToolChains/HIP.cpp | 4 ---- clang/test/Driver/hip-options.hip | 5 +++++ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 272a498990122..f6eeb53964a7d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5476,9 +5476,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Forward -cl options to -cc1 RenderOpenCLOptions(Args, CmdArgs); - if (IsHIP && Args.hasFlag(options::OPT_fhip_new_launch_api, - options::OPT_fno_hip_new_launch_api, true)) - CmdArgs.push_back("-fhip-new-launch-api"); + if (IsHIP) { + if (Args.hasFlag(options::OPT_fhip_new_launch_api, + options::OPT_fno_hip_new_launch_api, true)) + CmdArgs.push_back("-fhip-new-launch-api"); + if (Args.hasFlag(options::OPT_fgpu_allow_device_init, + options::OPT_fno_gpu_allow_device_init, false)) + CmdArgs.push_back("-fgpu-allow-device-init"); + } if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) { CmdArgs.push_back( diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index f1044f316fc84..4d1e0f9f2fdfc 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -268,10 +268,6 @@ void HIPToolChain::addClangTargetOptions( CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr)); } - if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init, - options::OPT_fno_gpu_allow_device_init, false)) - CC1Args.push_back("-fgpu-allow-device-init"); - CC1Args.push_back("-fcuda-allow-variadic-functions"); // Default to "hidden" visibility, as object level linking will not be diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index a7a6e02a3c81c..fa7b019e57626 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -9,6 +9,11 @@ // CHECK: clang{{.*}}" "-cc1" {{.*}} "-fcuda-is-device" // CHECK-SAME: "--gpu-max-threads-per-block=1024" +// RUN: %clang -### -nogpuinc -nogpulib -fgpu-allow-device-init \ +// RUN: %s 2>&1 | FileCheck -check-prefix=DEVINIT %s +// DEVINIT: clang{{.*}}" "-cc1" {{.*}}"-fgpu-allow-device-init" +// DEVINIT: clang{{.*}}" "-cc1" {{.*}}"-fgpu-allow-device-init" + // RUN: %clang -### -x hip -target x86_64-pc-windows-msvc -fms-extensions \ // RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \ // RUN: FileCheck -check-prefix=MLLVM %s From 86649a83b1aa6a391c216db18580984fb28d0d07 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 29 Sep 2020 15:46:40 +0100 Subject: [PATCH 531/544] travis: report llvm revision for non-external builds The build log did not contain the llvm-project repository revision being built for `BUILD_EXTERNAL=0` builds. Add it to ease debugging of build failures. --- llvm-spirv/.travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm-spirv/.travis.yml b/llvm-spirv/.travis.yml index 99e4583b70638..0437d75db5933 100644 --- a/llvm-spirv/.travis.yml +++ b/llvm-spirv/.travis.yml @@ -89,6 +89,7 @@ script: mkdir llvm-spirv mv * llvm-spirv git clone https://github.com/llvm/llvm-project --depth 1 + git -C llvm-project log --oneline -1 mv llvm-spirv llvm-project/llvm-spirv fi - | From 2cf914a845e3f50ca19ea115f541b7b1558f1ec2 Mon Sep 17 00:00:00 2001 From: Alexey Sotkin Date: Thu, 1 Oct 2020 11:53:08 +0300 Subject: [PATCH 532/544] Handle @llvm.memset.* with non-constant arguments (#696) * Handle @llvm.memset.* with non-constant arguments There is no SPIR-V counterpart for @llvm.memset.* intrinsic. Cases with constant value and length arguments are emulated via "storing" a constant array to the destination. For other cases we wrap the intrinsic in @spirv.llvm_memset_* function and expand the intrinsic to a loop via expandMemSetAsLoop() from llvm/Transforms/Utils/LowerMemIntrinsics.h. During reverse translation from SPIR-V to LLVM IR we can detect @spirv.llvm_memset_* and replace it with @llvm.memset. Signed-off-by: Alexey Sotkin Co-authored-by: Sven van Haastregt --- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 34 ++++++++---- llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp | 58 +++++++++++++++++++- llvm-spirv/test/transcoding/llvm.memset.ll | 55 ++++++++++++++++++- 3 files changed, 134 insertions(+), 13 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 58a060b168a84..44da6ddb8a8a1 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -2786,21 +2786,33 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF) { auto IsKernel = isKernel(BF); auto Linkage = IsKernel ? GlobalValue::ExternalLinkage : transLinkageType(BF); FunctionType *FT = dyn_cast(transType(BF->getFunctionType())); - Function *F = cast( - mapValue(BF, Function::Create(FT, Linkage, BF->getName(), M))); + std::string FuncName = BF->getName(); + StringRef FuncNameRef(FuncName); + // Transform "@spirv.llvm_memset_p0i8_i32.volatile" to @llvm.memset.p0i8.i32 + // assuming llvm.memset is supported by the device compiler. If this + // assumption is not safe, we should have a command line option to control + // this behavior. + if (FuncNameRef.consume_front("spirv.")) { + FuncNameRef.consume_back(".volatile"); + FuncName = FuncNameRef.str(); + std::replace(FuncName.begin(), FuncName.end(), '_', '.'); + } + Function *F = M->getFunction(FuncName); + if (!F) + F = Function::Create(FT, Linkage, FuncName, M); + F = cast(mapValue(BF, F)); mapFunction(BF, F); + if (F->isIntrinsic()) + return F; + + F->setCallingConv(IsKernel ? CallingConv::SPIR_KERNEL + : CallingConv::SPIR_FUNC); if (BF->hasDecorate(DecorationReferencedIndirectlyINTEL)) F->addFnAttr("referenced-indirectly"); - - if (!F->isIntrinsic()) { - F->setCallingConv(IsKernel ? CallingConv::SPIR_KERNEL - : CallingConv::SPIR_FUNC); - if (isFuncNoUnwind()) - F->addFnAttr(Attribute::NoUnwind); - foreachFuncCtlMask(BF, - [&](Attribute::AttrKind Attr) { F->addFnAttr(Attr); }); - } + if (isFuncNoUnwind()) + F->addFnAttr(Attribute::NoUnwind); + foreachFuncCtlMask(BF, [&](Attribute::AttrKind Attr) { F->addFnAttr(Attr); }); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp index e12998c183d06..b63dc2ce21e58 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp @@ -46,6 +46,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" // expandMemSetAsLoop() #include #include @@ -75,6 +76,15 @@ class SPIRVRegularizeLLVM : public ModulePass { void lowerFuncPtr(Function *F, Op OC); void lowerFuncPtr(Module *M); + /// There is no SPIR-V counterpart for @llvm.memset.* intrinsic. Cases with + /// constant value and length arguments are emulated via "storing" a constant + /// array to the destination. For other cases we wrap the intrinsic in + /// @spirv.llvm_memset_* function and expand the intrinsic to a loop via + /// expandMemSetAsLoop() from llvm/Transforms/Utils/LowerMemIntrinsics.h + /// During reverse translation from SPIR-V to LLVM IR we can detect + /// @spirv.llvm_memset_* and replace it with @llvm.memset. + void lowerMemset(MemSetInst *MSI); + static char ID; private: @@ -84,6 +94,49 @@ class SPIRVRegularizeLLVM : public ModulePass { char SPIRVRegularizeLLVM::ID = 0; +void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) { + if (isa(MSI->getValue()) && isa(MSI->getLength())) + return; // To be handled in LLVMToSPIRV::transIntrinsicInst + Function *IntrinsicFunc = MSI->getCalledFunction(); + assert(IntrinsicFunc && "Missing function"); + std::string FuncName = IntrinsicFunc->getName().str(); + std::replace(FuncName.begin(), FuncName.end(), '.', '_'); + FuncName = "spirv." + FuncName; + if (MSI->isVolatile()) + FuncName += ".volatile"; + + // Redirect @llvm.memset.* call to @spirv.llvm_memset_* + Function *F = M->getFunction(FuncName); + if (F) { + // This function is already linked in. + MSI->setCalledFunction(F); + return; + } + // TODO copy arguments attributes: nocapture writeonly. + FunctionCallee FC = M->getOrInsertFunction(FuncName, MSI->getFunctionType()); + MSI->setCalledFunction(FC); + + F = dyn_cast(FC.getCallee()); + assert(F && "must be a function!"); + Argument *Dest = F->getArg(0); + Argument *Val = F->getArg(1); + Argument *Len = F->getArg(2); + Argument *IsVolatile = F->getArg(3); + Dest->setName("dest"); + Val->setName("val"); + Len->setName("len"); + IsVolatile->setName("isvolatile"); + IsVolatile->addAttr(Attribute::ImmArg); + BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", F); + IRBuilder<> IRB(EntryBB); + auto *MemSet = + IRB.CreateMemSet(Dest, Val, Len, MSI->getDestAlign(), MSI->isVolatile()); + IRB.CreateRetVoid(); + expandMemSetAsLoop(cast(MemSet)); + MemSet->eraseFromParent(); + return; +} + bool SPIRVRegularizeLLVM::runOnModule(Module &Module) { M = &Module; Ctx = &M->getContext(); @@ -115,8 +168,11 @@ bool SPIRVRegularizeLLVM::regularize() { if (auto Call = dyn_cast(&II)) { Call->setTailCall(false); Function *CF = Call->getCalledFunction(); - if (CF && CF->isIntrinsic()) + if (CF && CF->isIntrinsic()) { removeFnAttr(Call, Attribute::NoUnwind); + if (auto *MSI = dyn_cast(Call)) + lowerMemset(MSI); + } } // Remove optimization info not supported by SPIRV diff --git a/llvm-spirv/test/transcoding/llvm.memset.ll b/llvm-spirv/test/transcoding/llvm.memset.ll index 9a72f2db2bb35..950928c845892 100644 --- a/llvm-spirv/test/transcoding/llvm.memset.ll +++ b/llvm-spirv/test/transcoding/llvm.memset.ll @@ -6,6 +6,7 @@ ; RUN: spirv-val %t.spv ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM +; CHECK-SPIRV: Decorate [[#NonConstMemset:]] LinkageAttributes "spirv.llvm_memset_p3i8_i32" ; CHECK-SPIRV: TypeInt [[Int8:[0-9]+]] 8 0 ; CHECK-SPIRV: Constant {{[0-9]+}} [[Lenmemset21:[0-9]+]] 4 ; CHECK-SPIRV: Constant {{[0-9]+}} [[Lenmemset0:[0-9]+]] 12 @@ -19,6 +20,7 @@ ; CHECK-SPIRV: Variable {{[0-9]+}} [[Val:[0-9]+]] 0 [[Init]] ; CHECK-SPIRV: 7 ConstantComposite [[Int8x4]] [[InitComp:[0-9]+]] [[Const21]] [[Const21]] [[Const21]] [[Const21]] ; CHECK-SPIRV: Variable {{[0-9]+}} [[ValComp:[0-9]+]] 0 [[InitComp]] +; CHECK-SPIRV: ConstantFalse [[#]] [[#False:]] ; CHECK-SPIRV: Bitcast [[Int8Ptr]] [[Target:[0-9]+]] {{[0-9]+}} ; CHECK-SPIRV: Bitcast [[Int8PtrConst]] [[Source:[0-9]+]] [[Val]] @@ -27,6 +29,31 @@ ; CHECK-SPIRV: Bitcast [[Int8PtrConst]] [[SourceComp:[0-9]+]] [[ValComp]] ; CHECK-SPIRV: CopyMemorySized {{[0-9]+}} [[SourceComp]] [[Lenmemset21]] 2 4 +; CHECK-SPIRV: FunctionCall [[#]] [[#]] [[#NonConstMemset]] [[#]] [[#]] [[#]] [[#False]] + +; CHECK-SPIRV: Function [[#]] [[#NonConstMemset]] +; CHECK-SPIRV: FunctionParameter [[#]] [[#Dest:]] +; CHECK-SPIRV: FunctionParameter [[#]] [[#Value:]] +; CHECK-SPIRV: FunctionParameter [[#]] [[#Len:]] +; CHECK-SPIRV: FunctionParameter [[#]] [[#Volatile:]] + +; CHECK-SPIRV: Label [[#Entry:]] +; CHECK-SPIRV: IEqual [[#]] [[#IsZeroLen:]] [[#Zero:]] [[#Len]] +; CHECK-SPIRV: BranchConditional [[#IsZeroLen]] [[#End:]] [[#WhileBody:]] + +; CHECK-SPIRV: Label [[#WhileBody]] +; CHECK-SPIRV: Phi [[#]] [[#Offset:]] [[#Zero]] [[#Entry]] [[#OffsetInc:]] [[#WhileBody]] +; CHECK-SPIRV: InBoundsPtrAccessChain [[#]] [[#Ptr:]] [[#Dest]] [[#Offset]] +; CHECK-SPIRV: Store [[#Ptr]] [[#Value]] 2 1 +; CHECK-SPIRV: IAdd [[#]] [[#OffsetInc]] [[#Offset]] [[#One:]] +; CHECK-SPIRV: ULessThan [[#]] [[#NotEnd:]] [[#OffsetInc]] [[#Len]] +; CHECK-SPIRV: BranchConditional [[#NotEnd]] [[#WhileBody]] [[#End]] + +; CHECK-SPIRV: Label [[#End]] +; CHECK-SPIRV: Return + +; CHECK-SPIRV: FunctionEnd + target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir" @@ -36,7 +63,7 @@ target triple = "spir" ; CHECK-LLVM: internal unnamed_addr addrspace(2) constant [4 x i8] c"\15\15\15\15" ; Function Attrs: nounwind -define spir_func void @_Z5foo11v(%struct.S1 addrspace(4)* noalias nocapture sret %agg.result) #0 { +define spir_func void @_Z5foo11v(%struct.S1 addrspace(4)* noalias nocapture sret %agg.result, i32 %s1, i64 %s2, i8 %v) #0 { %x = alloca [4 x i8] %x.bc = bitcast [4 x i8]* %x to i8* %1 = bitcast %struct.S1 addrspace(4)* %agg.result to i8 addrspace(4)* @@ -44,6 +71,26 @@ define spir_func void @_Z5foo11v(%struct.S1 addrspace(4)* noalias nocapture sret ; CHECK-LLVM: call void @llvm.memset.p4i8.i32(i8 addrspace(4)* align 4 %1, i8 0, i32 12, i1 false) tail call void @llvm.memset.p0i8.i32(i8* align 4 %x.bc, i8 21, i32 4, i1 false) ; CHECK-LLVM: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %x.bc, i8 addrspace(2)* align 4 %3, i32 4, i1 false) + + ; non-const value + tail call void @llvm.memset.p0i8.i32(i8* align 4 %x.bc, i8 %v, i32 3, i1 false) +; CHECK-LLVM: call void @llvm.memset.p0i8.i32(i8* %x.bc, i8 %v, i32 3, i1 false) + + ; non-const value and size + tail call void @llvm.memset.p0i8.i32(i8* align 4 %x.bc, i8 %v, i32 %s1, i1 false) +; CHECK-LLVM: call void @llvm.memset.p0i8.i32(i8* %x.bc, i8 %v, i32 %s1, i1 false) + + ; Address spaces, non-const value and size + %a = addrspacecast i8 addrspace(4)* %1 to i8 addrspace(3)* + tail call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %a, i8 %v, i32 %s1, i1 false) +; CHECK-LLVM: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %a, i8 %v, i32 %s1, i1 false) + %b = addrspacecast i8 addrspace(4)* %1 to i8 addrspace(1)* + tail call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %b, i8 %v, i64 %s2, i1 false) +; CHECK-LLVM: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %b, i8 %v, i64 %s2, i1 false) + + ; Volatile + tail call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %b, i8 %v, i64 %s2, i1 true) +; CHECK-LLVM: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %b, i8 %v, i64 %s2, i1 true) ret void } @@ -53,6 +100,12 @@ declare void @llvm.memset.p4i8.i32(i8 addrspace(4)* nocapture, i8, i32, i1) #1 ; Function Attrs: nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1 +; Function Attrs: nounwind +declare void @llvm.memset.p3i8.i32(i8 addrspace(3)*, i8, i32, i1) #1 + +; Function Attrs: nounwind +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)*, i8, i64, i1) #1 + attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } From b9df4ab8147dfdeec58d64a8f8d9efea9ca0477b Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Thu, 1 Oct 2020 11:08:35 +0300 Subject: [PATCH 533/544] Allow to translate llvm.fabs with half arguments The OpenCL SPIR-V Environment Specification seems to allow corresponding instruction. --- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 2 +- llvm-spirv/test/llvm.fabs.ll | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index 77f0f7da8b71a..71418018ea677 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -1542,7 +1542,7 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { NumElems = VecTy->getNumElements(); Ty = VecTy->getElementType(); } - if ((!Ty->isFloatTy() && !Ty->isDoubleTy()) || + if ((!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) || ((NumElems > 4) && (NumElems != 8) && (NumElems != 16))) { BM->getErrorLog().checkError(false, SPIRVEC_InvalidFunctionCall, II->getCalledOperand()->getName().str(), "", diff --git a/llvm-spirv/test/llvm.fabs.ll b/llvm-spirv/test/llvm.fabs.ll index c850da9652cc1..d818aab79275c 100644 --- a/llvm-spirv/test/llvm.fabs.ll +++ b/llvm-spirv/test/llvm.fabs.ll @@ -8,9 +8,21 @@ target triple = "spir64-unknown-unknown" ; CHECK: ExtInstImport [[extinst_id:[0-9]+]] "OpenCL.std" +; CHECK: 3 TypeFloat [[var0:[0-9]+]] 16 ; CHECK: 3 TypeFloat [[var1:[0-9]+]] 32 ; CHECK: 3 TypeFloat [[var2:[0-9]+]] 64 -; CHECK: 4 TypeVector [[var3:[0-9]+]] 2 4 +; CHECK: 4 TypeVector [[var3:[0-9]+]] [[var1]] 4 + +; CHECK: Function +; CHECK: 6 ExtInst [[var0]] {{[0-9]+}} [[extinst_id]] fabs +; CHECK: FunctionEnd + +; Function Attrs: nounwind readnone +define spir_func half @TestFabs16(half %x) local_unnamed_addr #0 { +entry: + %0 = tail call half @llvm.fabs.f16(half %x) + ret half %0 +} ; CHECK: Function ; CHECK: 6 ExtInst [[var1]] {{[0-9]+}} [[extinst_id]] fabs @@ -45,6 +57,9 @@ entry: ret <4 x float> %0 } +; Function Attrs: nounwind readnone +declare half @llvm.fabs.f16(half) #1 + ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 From ee7aded6070b58d4a76348bb73fcc48cda7314ec Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 5 Oct 2020 09:54:57 +0100 Subject: [PATCH 534/544] Update dbg-declare-arg test after LLVM change Propagate the test change from LLVM commit 55f9f87da2c ("Reapply Revert "RegAllocFast: Rewrite and improve"", 2020-09-21). --- llvm-spirv/test/DebugInfo/X86/dbg-declare-arg.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm-spirv/test/DebugInfo/X86/dbg-declare-arg.ll b/llvm-spirv/test/DebugInfo/X86/dbg-declare-arg.ll index 5d5253f20600d..43a172d7b06e0 100644 --- a/llvm-spirv/test/DebugInfo/X86/dbg-declare-arg.ll +++ b/llvm-spirv/test/DebugInfo/X86/dbg-declare-arg.ll @@ -24,7 +24,7 @@ target triple = "spir64-unknown-unknown" ; CHECK: DW_AT_name {{.*}}"j" ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] ( -; CHECK-NEXT: 0x{{.*}}, 0x{{.*}}: DW_OP_breg7 RSP+16, DW_OP_deref) +; CHECK-NEXT: 0x{{.*}}, 0x{{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref) ; CHECK-NEXT: DW_AT_name {{.*}}"my_a" %class.A = type { i32, i32, i32, i32 } From ff49e34489698b64946b3bbafd0edb0305bddfcf Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Wed, 23 Sep 2020 13:42:17 +0300 Subject: [PATCH 535/544] Fix translation of APInt constants Previously APInt constants were being stored into uint64_t value with following encoding/decoding. Now they are being packed into SPIRVWords array directly. Signed-off-by: Dmitry Sidorov --- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 19 +++++++++++++++- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 11 +++++++++- llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp | 22 +++++++++++++++++++ llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h | 5 +++++ llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h | 2 +- llvm-spirv/lib/SPIRV/libSPIRV/SPIRVValue.h | 13 +++++++++-- ...capability-arbitrary-precision-integers.ll | 16 +++++++------- 7 files changed, 75 insertions(+), 13 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 44da6ddb8a8a1..0a45c9301f79d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -1497,10 +1497,27 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, } switch (BT->getOpCode()) { case OpTypeBool: - case OpTypeInt: + case OpTypeInt: { + const unsigned NumBits = BT->getBitWidth(); + if (NumBits > 64) { + // Translate arbitrary precision integer constants + const unsigned RawDataNumWords = BConst->getNumWords(); + const unsigned BigValNumWords = (RawDataNumWords + 1) / 2; + std::vector BigValVec(BigValNumWords); + const SPIRVWord *RawData = BConst->getSPIRVWords(); + // SPIRV words are integers of 32-bit width, meanwhile llvm::APInt + // is storing data using an array of 64-bit words. Here we pack SPIRV + // words into 64-bit integer array. + for (size_t I = 0; I != RawDataNumWords; ++I) + BigValVec[I / 2] = + (I % 2) ? BigValVec[I / 2] | ((uint64_t)RawData[I] << 32) + : BigValVec[I / 2] | ((uint64_t)RawData[I]); + return mapValue(BV, ConstantInt::get(LT, APInt(NumBits, BigValVec))); + } return mapValue( BV, ConstantInt::get(LT, ConstValue, static_cast(BT)->isSigned())); + } case OpTypeFloat: { const llvm::fltSemantics *FS = nullptr; switch (BT->getFloatBitWidth()) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 4ce54caf707e6..6ca64f5cab2ce 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -684,8 +684,17 @@ SPIRVValue *LLVMToSPIRV::transConstant(Value *V) { return BM->addNullConstant(transType(AggType)); } - if (auto ConstI = dyn_cast(V)) + if (auto ConstI = dyn_cast(V)) { + unsigned BitWidth = ConstI->getType()->getBitWidth(); + if (BitWidth > 64) { + BM->getErrorLog().checkError( + BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_arbitrary_precision_integers), + SPIRVEC_InvalidBitWidth, std::to_string(BitWidth)); + return BM->addConstant(transType(V->getType()), ConstI->getValue()); + } return BM->addConstant(transType(V->getType()), ConstI->getZExtValue()); + } if (auto ConstFP = dyn_cast(V)) { auto BT = static_cast(transType(V->getType())); diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp index cbae853e50b3c..64d1b1f2317bc 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp @@ -48,6 +48,8 @@ #include "SPIRVType.h" #include "SPIRVValue.h" +#include "llvm/ADT/APInt.h" + #include #include #include @@ -261,6 +263,7 @@ class SPIRVModuleImpl : public SPIRVModule { SPIRVFunction *F) override; SPIRVValue *addConstant(SPIRVValue *) override; SPIRVValue *addConstant(SPIRVType *, uint64_t) override; + SPIRVValue *addConstant(SPIRVType *, llvm::APInt) override; SPIRVValue *addSpecConstant(SPIRVType *, uint64_t) override; SPIRVValue *addDoubleConstant(SPIRVTypeFloat *, double) override; SPIRVValue *addFloatConstant(SPIRVTypeFloat *, float) override; @@ -1021,6 +1024,25 @@ SPIRVValue *SPIRVModuleImpl::addConstant(SPIRVType *Ty, uint64_t V) { return addConstant(new SPIRVConstant(this, Ty, getId(), V)); } +// Complete constructor for AP integer constant +template +SPIRVConstantBase::SPIRVConstantBase(SPIRVModule *M, SPIRVType *TheType, + SPIRVId TheId, llvm::APInt &TheValue) + : SPIRVValue(M, 0, OC, TheType, TheId) { + const uint64_t *BigValArr = TheValue.getRawData(); + for (size_t I = 0; I != TheValue.getNumWords(); ++I) { + Union.Words[I * 2 + 1] = + (uint32_t)((BigValArr[I] & 0xFFFFFFFF00000000LL) >> 32); + Union.Words[I * 2] = (uint32_t)(BigValArr[I] & 0xFFFFFFFFLL); + } + recalculateWordCount(); + validate(); +} + +SPIRVValue *SPIRVModuleImpl::addConstant(SPIRVType *Ty, llvm::APInt V) { + return addConstant(new SPIRVConstant(this, Ty, getId(), V)); +} + SPIRVValue *SPIRVModuleImpl::addIntegerConstant(SPIRVTypeInt *Ty, uint64_t V) { if (Ty->getBitWidth() == 32) { unsigned I32 = static_cast(V); diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h index 3e77ef8d40e7e..2696ac3a75e1a 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h @@ -48,6 +48,10 @@ #include #include +namespace llvm { +class APInt; +} // namespace llvm + namespace SPIRV { template class SPIRVConstantBase; @@ -252,6 +256,7 @@ class SPIRVModule { SPIRVFunction *F) = 0; virtual SPIRVValue *addConstant(SPIRVValue *) = 0; virtual SPIRVValue *addConstant(SPIRVType *, uint64_t) = 0; + virtual SPIRVValue *addConstant(SPIRVType *, llvm::APInt) = 0; virtual SPIRVValue *addSpecConstant(SPIRVType *, uint64_t) = 0; virtual SPIRVValue *addDoubleConstant(SPIRVTypeFloat *, double) = 0; virtual SPIRVValue *addFloatConstant(SPIRVTypeFloat *, float) = 0; diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h index 0bc6c3da46d99..077d285a109e7 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h @@ -185,7 +185,7 @@ class SPIRVTypeInt : public SPIRVType { (BitWidth <= 64 || (Module->isAllowedToUseExtension( ExtensionID::SPV_INTEL_arbitrary_precision_integers) && - BitWidth <= 1024)) && + BitWidth <= 2048)) && "Invalid bit width"); } diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVValue.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVValue.h index d8bfff6e78c21..cf38fff4600fe 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVValue.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVValue.h @@ -47,6 +47,10 @@ #include "SPIRVEntry.h" #include "SPIRVType.h" +namespace llvm { +class APInt; +} // namespace llvm + #include namespace SPIRV { @@ -146,6 +150,9 @@ template class SPIRVConstantBase : public SPIRVValue { recalculateWordCount(); validate(); } + // Incomplete constructor for AP integer constant + SPIRVConstantBase(SPIRVModule *M, SPIRVType *TheType, SPIRVId TheId, + llvm::APInt &TheValue); // Complete constructor for float constant SPIRVConstantBase(SPIRVModule *M, SPIRVType *TheType, SPIRVId TheId, float TheValue) @@ -167,6 +174,8 @@ template class SPIRVConstantBase : public SPIRVValue { uint64_t getZExtIntValue() const { return Union.UInt64Val; } float getFloatValue() const { return Union.FloatVal; } double getDoubleValue() const { return Union.DoubleVal; } + unsigned getNumWords() const { return NumWords; } + SPIRVWord *getSPIRVWords() { return Union.Words; } protected: void recalculateWordCount() { @@ -175,7 +184,7 @@ template class SPIRVConstantBase : public SPIRVValue { } void validate() const override { SPIRVValue::validate(); - assert(NumWords >= 1 && NumWords <= 32 && "Invalid constant size"); + assert(NumWords >= 1 && NumWords <= 64 && "Invalid constant size"); } void encode(spv_ostream &O) const override { getEncoder(O) << Type << Id; @@ -197,7 +206,7 @@ template class SPIRVConstantBase : public SPIRVValue { uint64_t UInt64Val; float FloatVal; double DoubleVal; - SPIRVWord Words[32]; + SPIRVWord Words[64]; UnionType() { UInt64Val = 0; } } Union; }; diff --git a/llvm-spirv/test/capability-arbitrary-precision-integers.ll b/llvm-spirv/test/capability-arbitrary-precision-integers.ll index a747aaae5046c..c70a17179a15c 100644 --- a/llvm-spirv/test/capability-arbitrary-precision-integers.ll +++ b/llvm-spirv/test/capability-arbitrary-precision-integers.ll @@ -18,11 +18,11 @@ ; CHECK-SPIRV-DAG: TypeInt [[#I96:]] 96 0 ; CHECK-SPIRV-DAG: TypeInt [[#I128:]] 128 0 ; CHECK-SPIRV-DAG: TypeInt [[#I256:]] 256 0 -; CHECK-SPIRV-DAG: TypeInt [[#I1024:]] 1024 0 -; CHECK-SPIRV-DAG: Constant [[#I96]] [[#]] 1 0 0 +; CHECK-SPIRV-DAG: TypeInt [[#I2048:]] 2048 0 +; CHECK-SPIRV-DAG: Constant [[#I96]] [[#]] 4 0 1 ; CHECK-SPIRV-DAG: Constant [[#I128]] [[#]] 1 0 0 0 ; CHECK-SPIRV-DAG: Constant [[#I256]] [[#]] 1 0 0 0 0 0 0 0 -; CHECK-SPIRV-DAG: Constant [[#I1024]] [[#]] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +; CHECK-SPIRV-DAG: Constant [[#I2048]] [[#]] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-unknown" @@ -36,7 +36,7 @@ target triple = "spir64-unknown-unknown" @d = addrspace(1) global i96 0, align 8 @e = addrspace(1) global i128 0, align 8 @f = addrspace(1) global i256 0, align 8 -@g = addrspace(1) global i1024 0, align 8 +@g = addrspace(1) global i2048 0, align 8 ; Function Attrs: noinline nounwind optnone ; CHECK-LLVM: void @_Z4funci(i30 %a) @@ -50,13 +50,13 @@ entry: store i30 1, i30* %a.addr, align 4 ; CHECK-LLVM: store i48 -4294901761, i48 addrspace(1)* @c store i48 -4294901761, i48 addrspace(1)* @c, align 8 - store i96 1, i96 addrspace(1)* @d, align 8 -; CHECK-LLVM: store i96 1, i96 addrspace(1)* @d + store i96 18446744073709551620, i96 addrspace(1)* @d, align 8 +; CHECK-LLVM: store i96 18446744073709551620, i96 addrspace(1)* @d store i128 1, i128 addrspace(1)* @e, align 8 ; CHECK-LLVM: store i128 1, i128 addrspace(1)* @e store i256 1, i256 addrspace(1)* @f, align 8 ; CHECK-LLVM: store i256 1, i256 addrspace(1)* @f - store i1024 1, i1024 addrspace(1)* @g, align 8 -; CHECK-LLVM: store i1024 1, i1024 addrspace(1)* @g + store i2048 1, i2048 addrspace(1)* @g, align 8 +; CHECK-LLVM: store i2048 1, i2048 addrspace(1)* @g ret void } From 225d801a3c60a81ff0bced4584c9d57004c1ae56 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 2 Oct 2020 18:50:51 +0300 Subject: [PATCH 536/544] Rename OCL20ToSPIRV pass Renamed the pass to OCLToSPIRV: the problem with the old name is that it doesn't represent actual pass content: it handles not only OpenCL C 2.0 built-in functions, but also OpenCL C 1.2 built-in functions, acting like an adapter between OpenCL C representation and SPIR-V friendly IR representation. Also updated some comments to reflect that. --- llvm-spirv/include/LLVMSPIRVLib.h | 6 +- llvm-spirv/lib/SPIRV/CMakeLists.txt | 2 +- .../{OCL20ToSPIRV.cpp => OCLToSPIRV.cpp} | 111 +++++++++--------- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 2 +- .../test/transcoding/OpControlBarrier_cl21.ll | 2 +- 5 files changed, 63 insertions(+), 60 deletions(-) rename llvm-spirv/lib/SPIRV/{OCL20ToSPIRV.cpp => OCLToSPIRV.cpp} (95%) diff --git a/llvm-spirv/include/LLVMSPIRVLib.h b/llvm-spirv/include/LLVMSPIRVLib.h index 2fe5b3e8720c3..91373ea45e422 100644 --- a/llvm-spirv/include/LLVMSPIRVLib.h +++ b/llvm-spirv/include/LLVMSPIRVLib.h @@ -51,7 +51,7 @@ namespace llvm { // PassSupport.h. class PassRegistry; void initializeLLVMToSPIRVPass(PassRegistry &); -void initializeOCL20ToSPIRVPass(PassRegistry &); +void initializeOCLToSPIRVPass(PassRegistry &); void initializeOCL21ToSPIRVPass(PassRegistry &); void initializeOCLTypeToSPIRVPass(PassRegistry &); void initializeSPIRVLowerBoolPass(PassRegistry &); @@ -162,9 +162,9 @@ void mangleOpenClBuiltin(const std::string &UnmangledName, /// Create a pass for translating LLVM to SPIR-V. ModulePass *createLLVMToSPIRV(SPIRV::SPIRVModule *); -/// Create a pass for translating OCL 2.0 builtin functions to SPIR-V builtin +/// Create a pass for translating OCL C builtin functions to SPIR-V builtin /// functions. -ModulePass *createOCL20ToSPIRV(); +ModulePass *createOCLToSPIRV(); /// Create a pass for translating OCL 2.1 builtin functions to SPIR-V builtin /// functions. diff --git a/llvm-spirv/lib/SPIRV/CMakeLists.txt b/llvm-spirv/lib/SPIRV/CMakeLists.txt index cb23a533c4242..b97ac9ad3edfd 100644 --- a/llvm-spirv/lib/SPIRV/CMakeLists.txt +++ b/llvm-spirv/lib/SPIRV/CMakeLists.txt @@ -4,7 +4,7 @@ add_llvm_library(LLVMSPIRVLib Mangler/Mangler.cpp Mangler/ManglingUtils.cpp Mangler/ParameterType.cpp - OCL20ToSPIRV.cpp + OCLToSPIRV.cpp OCL21ToSPIRV.cpp OCLTypeToSPIRV.cpp OCLUtil.cpp diff --git a/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp similarity index 95% rename from llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp rename to llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp index 0a9d7a930231a..1dc0bcceb04a1 100644 --- a/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp @@ -1,4 +1,4 @@ -//===- OCL20ToSPIRV.cpp - Transform OCL20 to SPIR-V builtins ----*- C++ -*-===// +//===- OCLToSPIRV.cpp - Transform OCL to SPIR-V builtins --------*- C++ -*-===// // // The LLVM/SPIRV Translator // @@ -32,10 +32,11 @@ // //===----------------------------------------------------------------------===// // -// This file implements translation of OCL20 builtin functions. +// This file implements preprocessing of OpenCL C built-in functions into SPIR-V +// friendly IR form for further translation into SPIR-V // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "cl20tospv" +#define DEBUG_TYPE "ocl-to-spv" #include "OCLTypeToSPIRV.h" #include "OCLUtil.h" @@ -68,10 +69,10 @@ static size_t getOCLCpp11AtomicMaxNumOps(StringRef Name) { .Default(0); } -class OCL20ToSPIRV : public ModulePass, public InstVisitor { +class OCLToSPIRV : public ModulePass, public InstVisitor { public: - OCL20ToSPIRV() : ModulePass(ID), M(nullptr), Ctx(nullptr), CLVer(0) { - initializeOCL20ToSPIRVPass(*PassRegistry::getPassRegistry()); + OCLToSPIRV() : ModulePass(ID), M(nullptr), Ctx(nullptr), CLVer(0) { + initializeOCLToSPIRVPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; @@ -320,20 +321,22 @@ class OCL20ToSPIRV : public ModulePass, public InstVisitor { } }; -char OCL20ToSPIRV::ID = 0; +char OCLToSPIRV::ID = 0; -bool OCL20ToSPIRV::runOnModule(Module &Module) { +bool OCLToSPIRV::runOnModule(Module &Module) { M = &Module; Ctx = &M->getContext(); auto Src = getSPIRVSource(&Module); + // This is a pre-processing pass, which transform LLVM IR module to a more + // suitable form for the SPIR-V translation: it is specifically designed to + // handle OpenCL C built-in functions and shouldn't be launched for other + // source languages if (std::get<0>(Src) != spv::SourceLanguageOpenCL_C) return false; CLVer = std::get<1>(Src); - if (CLVer == kOCLVer::CL21) - return false; - LLVM_DEBUG(dbgs() << "Enter OCL20ToSPIRV:\n"); + LLVM_DEBUG(dbgs() << "Enter OCLToSPIRV:\n"); transWorkItemBuiltinsToVariables(); @@ -347,9 +350,9 @@ bool OCL20ToSPIRV::runOnModule(Module &Module) { GV->eraseFromParent(); eraseUselessFunctions(M); // remove unused functions declarations - LLVM_DEBUG(dbgs() << "After OCL20ToSPIRV:\n" << *M); + LLVM_DEBUG(dbgs() << "After OCLToSPIRV:\n" << *M); - verifyRegularizationPass(*M, "OCL20ToSPIRV"); + verifyRegularizationPass(*M, "OCLToSPIRV"); return true; } @@ -357,7 +360,7 @@ bool OCL20ToSPIRV::runOnModule(Module &Module) { // The order of handling OCL builtin functions is important. // Workgroup functions need to be handled before pipe functions since // there are functions fall into both categories. -void OCL20ToSPIRV::visitCallInst(CallInst &CI) { +void OCLToSPIRV::visitCallInst(CallInst &CI) { LLVM_DEBUG(dbgs() << "[visistCallInst] " << CI << '\n'); auto F = CI.getCalledFunction(); if (!F) @@ -541,7 +544,7 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { visitCallBuiltinSimple(&CI, MangledName, DemangledName); } -void OCL20ToSPIRV::visitCallNDRange(CallInst *CI, StringRef DemangledName) { +void OCLToSPIRV::visitCallNDRange(CallInst *CI, StringRef DemangledName) { assert(DemangledName.find(kOCLBuiltinName::NDRangePrefix) == 0); StringRef LenStr = DemangledName.substr(8, 1); auto Len = atoi(LenStr.data()); @@ -591,7 +594,7 @@ void OCL20ToSPIRV::visitCallNDRange(CallInst *CI, StringRef DemangledName) { &Attrs); } -void OCL20ToSPIRV::visitCallAsyncWorkGroupCopy(CallInst *CI, +void OCLToSPIRV::visitCallAsyncWorkGroupCopy(CallInst *CI, StringRef DemangledName) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); @@ -607,7 +610,7 @@ void OCL20ToSPIRV::visitCallAsyncWorkGroupCopy(CallInst *CI, &Attrs); } -CallInst *OCL20ToSPIRV::visitCallAtomicCmpXchg(CallInst *CI) { +CallInst *OCLToSPIRV::visitCallAtomicCmpXchg(CallInst *CI) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); Value *Expected = nullptr; CallInst *NewCI = nullptr; @@ -635,14 +638,14 @@ CallInst *OCL20ToSPIRV::visitCallAtomicCmpXchg(CallInst *CI) { return NewCI; } -void OCL20ToSPIRV::visitCallAtomicInit(CallInst *CI) { +void OCLToSPIRV::visitCallAtomicInit(CallInst *CI) { auto ST = new StoreInst(CI->getArgOperand(1), CI->getArgOperand(0), CI); ST->takeName(CI); CI->dropAllReferences(); CI->eraseFromParent(); } -void OCL20ToSPIRV::visitCallAllAny(spv::Op OC, CallInst *CI) { +void OCLToSPIRV::visitCallAllAny(spv::Op OC, CallInst *CI) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); @@ -677,18 +680,18 @@ void OCL20ToSPIRV::visitCallAllAny(spv::Op OC, CallInst *CI) { } } -void OCL20ToSPIRV::visitCallAtomicWorkItemFence(CallInst *CI) { +void OCLToSPIRV::visitCallAtomicWorkItemFence(CallInst *CI) { transMemoryBarrier(CI, getAtomicWorkItemFenceLiterals(CI)); } -void OCL20ToSPIRV::visitCallMemFence(CallInst *CI) { +void OCLToSPIRV::visitCallMemFence(CallInst *CI) { transMemoryBarrier( CI, std::make_tuple(cast(CI->getArgOperand(0))->getZExtValue(), OCLMO_relaxed, OCLMS_work_group)); } -void OCL20ToSPIRV::transMemoryBarrier(CallInst *CI, +void OCLToSPIRV::transMemoryBarrier(CallInst *CI, AtomicWorkItemFenceLiterals Lit) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); @@ -703,7 +706,7 @@ void OCL20ToSPIRV::transMemoryBarrier(CallInst *CI, &Attrs); } -void OCL20ToSPIRV::visitCallAtomicLegacy(CallInst *CI, StringRef MangledName, +void OCLToSPIRV::visitCallAtomicLegacy(CallInst *CI, StringRef MangledName, StringRef DemangledName) { StringRef Stem = DemangledName; if (Stem.startswith("atom_")) @@ -750,7 +753,7 @@ void OCL20ToSPIRV::visitCallAtomicLegacy(CallInst *CI, StringRef MangledName, transAtomicBuiltin(CI, Info); } -void OCL20ToSPIRV::visitCallAtomicCpp11(CallInst *CI, StringRef MangledName, +void OCLToSPIRV::visitCallAtomicCpp11(CallInst *CI, StringRef MangledName, StringRef DemangledName) { StringRef Stem = DemangledName; if (Stem.startswith("atomic_")) @@ -795,7 +798,7 @@ void OCL20ToSPIRV::visitCallAtomicCpp11(CallInst *CI, StringRef MangledName, transAtomicBuiltin(CI, Info); } -void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { +void OCLToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstSPIRV( M, CI, @@ -833,7 +836,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { &Attrs); } -void OCL20ToSPIRV::visitCallBarrier(CallInst *CI) { +void OCLToSPIRV::visitCallBarrier(CallInst *CI) { auto Lit = getBarrierLiterals(CI); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstSPIRV(M, CI, @@ -856,7 +859,7 @@ void OCL20ToSPIRV::visitCallBarrier(CallInst *CI) { &Attrs); } -void OCL20ToSPIRV::visitCallConvert(CallInst *CI, StringRef MangledName, +void OCLToSPIRV::visitCallConvert(CallInst *CI, StringRef MangledName, StringRef DemangledName) { if (eraseUselessConvert(CI, MangledName, DemangledName)) return; @@ -909,7 +912,7 @@ void OCL20ToSPIRV::visitCallConvert(CallInst *CI, StringRef MangledName, &Attrs); } -void OCL20ToSPIRV::visitCallGroupBuiltin(CallInst *CI, +void OCLToSPIRV::visitCallGroupBuiltin(CallInst *CI, StringRef OrigDemangledName) { auto F = CI->getCalledFunction(); std::vector PreOps; @@ -1017,7 +1020,7 @@ void OCL20ToSPIRV::visitCallGroupBuiltin(CallInst *CI, transBuiltin(CI, Info); } -void OCL20ToSPIRV::transBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { +void OCLToSPIRV::transBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); Op OC = OpNop; unsigned ExtOp = ~0U; @@ -1069,7 +1072,7 @@ void OCL20ToSPIRV::transBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { &Attrs); } -void OCL20ToSPIRV::visitCallReadImageMSAA(CallInst *CI, StringRef MangledName) { +void OCLToSPIRV::visitCallReadImageMSAA(CallInst *CI, StringRef MangledName) { assert(MangledName.find("msaa") != StringRef::npos); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstSPIRV( @@ -1083,7 +1086,7 @@ void OCL20ToSPIRV::visitCallReadImageMSAA(CallInst *CI, StringRef MangledName) { &Attrs); } -void OCL20ToSPIRV::visitCallReadImageWithSampler(CallInst *CI, +void OCLToSPIRV::visitCallReadImageWithSampler(CallInst *CI, StringRef MangledName) { assert(MangledName.find(kMangledName::Sampler) != StringRef::npos); assert(CI->getCalledFunction() && "Unexpected indirect call"); @@ -1138,7 +1141,7 @@ void OCL20ToSPIRV::visitCallReadImageWithSampler(CallInst *CI, &Attrs); } -void OCL20ToSPIRV::visitCallGetImageSize(CallInst *CI, +void OCLToSPIRV::visitCallGetImageSize(CallInst *CI, StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); StringRef TyName; @@ -1199,7 +1202,7 @@ void OCL20ToSPIRV::visitCallGetImageSize(CallInst *CI, } /// Remove trivial conversion functions -bool OCL20ToSPIRV::eraseUselessConvert(CallInst *CI, StringRef MangledName, +bool OCLToSPIRV::eraseUselessConvert(CallInst *CI, StringRef MangledName, StringRef DemangledName) { auto TargetTy = CI->getType(); auto SrcTy = CI->getArgOperand(0)->getType(); @@ -1223,7 +1226,7 @@ bool OCL20ToSPIRV::eraseUselessConvert(CallInst *CI, StringRef MangledName, return false; } -void OCL20ToSPIRV::visitCallBuiltinSimple(CallInst *CI, StringRef MangledName, +void OCLToSPIRV::visitCallBuiltinSimple(CallInst *CI, StringRef MangledName, StringRef DemangledName) { OCLBuiltinTransInfo Info; Info.MangledName = MangledName.str(); @@ -1234,7 +1237,7 @@ void OCL20ToSPIRV::visitCallBuiltinSimple(CallInst *CI, StringRef MangledName, /// Translates OCL work-item builtin functions to SPIRV builtin variables. /// Function like get_global_id(i) -> x = load GlobalInvocationId; extract x, i /// Function like get_work_dim() -> load WorkDim -void OCL20ToSPIRV::transWorkItemBuiltinsToVariables() { +void OCLToSPIRV::transWorkItemBuiltinsToVariables() { LLVM_DEBUG(dbgs() << "Enter transWorkItemBuiltinsToVariables\n"); std::vector WorkList; for (auto &I : *M) { @@ -1285,7 +1288,7 @@ void OCL20ToSPIRV::transWorkItemBuiltinsToVariables() { } } -void OCL20ToSPIRV::visitCallReadWriteImage(CallInst *CI, +void OCLToSPIRV::visitCallReadWriteImage(CallInst *CI, StringRef DemangledName) { OCLBuiltinTransInfo Info; if (DemangledName.find(kOCLBuiltinName::ReadImage) == 0) @@ -1307,7 +1310,7 @@ void OCL20ToSPIRV::visitCallReadWriteImage(CallInst *CI, transBuiltin(CI, Info); } -void OCL20ToSPIRV::visitCallToAddr(CallInst *CI, StringRef DemangledName) { +void OCLToSPIRV::visitCallToAddr(CallInst *CI, StringRef DemangledName) { auto AddrSpace = static_cast(CI->getType()->getPointerAddressSpace()); OCLBuiltinTransInfo Info; @@ -1325,7 +1328,7 @@ void OCL20ToSPIRV::visitCallToAddr(CallInst *CI, StringRef DemangledName) { transBuiltin(CI, Info); } -void OCL20ToSPIRV::visitCallRelational(CallInst *CI, StringRef DemangledName) { +void OCLToSPIRV::visitCallRelational(CallInst *CI, StringRef DemangledName) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); Op OC = OpNop; @@ -1367,7 +1370,7 @@ void OCL20ToSPIRV::visitCallRelational(CallInst *CI, StringRef DemangledName) { &Attrs); } -void OCL20ToSPIRV::visitCallVecLoadStore(CallInst *CI, StringRef MangledName, +void OCLToSPIRV::visitCallVecLoadStore(CallInst *CI, StringRef MangledName, StringRef OrigDemangledName) { std::vector PreOps; std::string DemangledName{OrigDemangledName}; @@ -1407,7 +1410,7 @@ void OCL20ToSPIRV::visitCallVecLoadStore(CallInst *CI, StringRef MangledName, transBuiltin(CI, Info); } -void OCL20ToSPIRV::visitCallGetFence(CallInst *CI, StringRef DemangledName) { +void OCLToSPIRV::visitCallGetFence(CallInst *CI, StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); Op OC = OpNop; OCLSPIRVBuiltinMap::find(DemangledName.str(), &OC); @@ -1423,14 +1426,14 @@ void OCL20ToSPIRV::visitCallGetFence(CallInst *CI, StringRef DemangledName) { &Attrs); } -void OCL20ToSPIRV::visitCallDot(CallInst *CI) { +void OCLToSPIRV::visitCallDot(CallInst *CI) { IRBuilder<> Builder(CI); Value *FMulVal = Builder.CreateFMul(CI->getOperand(0), CI->getOperand(1)); CI->replaceAllUsesWith(FMulVal); CI->eraseFromParent(); } -void OCL20ToSPIRV::visitCallScalToVec(CallInst *CI, StringRef MangledName, +void OCLToSPIRV::visitCallScalToVec(CallInst *CI, StringRef MangledName, StringRef DemangledName) { // Check if all arguments have the same type - it's simple case. auto Uniform = true; @@ -1495,7 +1498,7 @@ void OCL20ToSPIRV::visitCallScalToVec(CallInst *CI, StringRef MangledName, &Attrs); } -void OCL20ToSPIRV::visitCallGetImageChannel(CallInst *CI, +void OCLToSPIRV::visitCallGetImageChannel(CallInst *CI, StringRef DemangledName, unsigned int Offset) { assert(CI->getCalledFunction() && "Unexpected indirect call"); @@ -1513,7 +1516,7 @@ void OCL20ToSPIRV::visitCallGetImageChannel(CallInst *CI, }, &Attrs); } -void OCL20ToSPIRV::visitCallEnqueueKernel(CallInst *CI, +void OCLToSPIRV::visitCallEnqueueKernel(CallInst *CI, StringRef DemangledName) { const DataLayout &DL = M->getDataLayout(); bool HasEvents = DemangledName.find("events") != StringRef::npos; @@ -1580,7 +1583,7 @@ void OCL20ToSPIRV::visitCallEnqueueKernel(CallInst *CI, CI->eraseFromParent(); } -void OCL20ToSPIRV::visitCallKernelQuery(CallInst *CI, StringRef DemangledName) { +void OCLToSPIRV::visitCallKernelQuery(CallInst *CI, StringRef DemangledName) { const DataLayout &DL = M->getDataLayout(); bool HasNDRange = DemangledName.find("_for_ndrange_impl") != StringRef::npos; // BIs with "_for_ndrange_impl" suffix has NDRange argument first, and @@ -1643,7 +1646,7 @@ static void processSubgroupBlockReadWriteINTEL(CallInst *CI, // buffers and images, but need to be mapped to distinct SPIR-V instructions. // Additionally, for block reads, need to distinguish between scalar block // reads and vector block reads. -void OCL20ToSPIRV::visitSubgroupBlockReadINTEL(CallInst *CI) { +void OCLToSPIRV::visitSubgroupBlockReadINTEL(CallInst *CI) { OCLBuiltinTransInfo Info; if (isOCLImageType(CI->getArgOperand(0)->getType())) Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockReadINTEL); @@ -1656,7 +1659,7 @@ void OCL20ToSPIRV::visitSubgroupBlockReadINTEL(CallInst *CI) { // The intel_sub_group_block_write built-ins are similarly overloaded to support // both buffers and images but need to be mapped to distinct SPIR-V // instructions. -void OCL20ToSPIRV::visitSubgroupBlockWriteINTEL(CallInst *CI) { +void OCLToSPIRV::visitSubgroupBlockWriteINTEL(CallInst *CI) { OCLBuiltinTransInfo Info; if (isOCLImageType(CI->getArgOperand(0)->getType())) Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockWriteINTEL); @@ -1669,7 +1672,7 @@ void OCL20ToSPIRV::visitSubgroupBlockWriteINTEL(CallInst *CI) { processSubgroupBlockReadWriteINTEL(CI, Info, DataTy, M); } -void OCL20ToSPIRV::visitSubgroupImageMediaBlockINTEL(CallInst *CI, +void OCLToSPIRV::visitSubgroupImageMediaBlockINTEL(CallInst *CI, StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); spv::Op OpCode = DemangledName.rfind("read") != StringRef::npos @@ -1719,7 +1722,7 @@ static Op getSubgroupAVCIntelMCEOpCodeForWrapper(StringRef DemangledName) { } // Handles Subgroup AVC Intel extension generic built-ins. -void OCL20ToSPIRV::visitSubgroupAVCBuiltinCall(CallInst *CI, +void OCLToSPIRV::visitSubgroupAVCBuiltinCall(CallInst *CI, StringRef DemangledName) { Op OC = OpNop; std::string FName{DemangledName}; @@ -1759,7 +1762,7 @@ void OCL20ToSPIRV::visitSubgroupAVCBuiltinCall(CallInst *CI, // 'IME', 'REF' and 'SIC' sets contain wrapper built-ins which don't have // corresponded instructions in SPIRV and should be translated to a // conterpart from 'MCE' with conversion for an argument and result (if needed). -void OCL20ToSPIRV::visitSubgroupAVCWrapperBuiltinCall(CallInst *CI, +void OCLToSPIRV::visitSubgroupAVCWrapperBuiltinCall(CallInst *CI, Op WrappedOC, StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); @@ -1825,7 +1828,7 @@ void OCL20ToSPIRV::visitSubgroupAVCWrapperBuiltinCall(CallInst *CI, // Handles Subgroup AVC Intel extension built-ins which take sampler as // an argument (their SPIR-V counterparts take OpTypeVmeImageIntel instead) -void OCL20ToSPIRV::visitSubgroupAVCBuiltinCallWithSampler( +void OCLToSPIRV::visitSubgroupAVCBuiltinCallWithSampler( CallInst *CI, StringRef DemangledName) { std::string FName{DemangledName}; std::string Prefix = kOCLSubgroupsAVCIntel::Prefix; @@ -1875,10 +1878,10 @@ void OCL20ToSPIRV::visitSubgroupAVCBuiltinCallWithSampler( } // namespace SPIRV -INITIALIZE_PASS_BEGIN(OCL20ToSPIRV, "cl20tospv", "Transform OCL 2.0 to SPIR-V", +INITIALIZE_PASS_BEGIN(OCLToSPIRV, "ocl-to-spv", "Transform OCL 2.0 to SPIR-V", false, false) INITIALIZE_PASS_DEPENDENCY(OCLTypeToSPIRV) -INITIALIZE_PASS_END(OCL20ToSPIRV, "cl20tospv", "Transform OCL 2.0 to SPIR-V", +INITIALIZE_PASS_END(OCLToSPIRV, "ocl-to-spv", "Transform OCL 2.0 to SPIR-V", false, false) -ModulePass *llvm::createOCL20ToSPIRV() { return new OCL20ToSPIRV(); } +ModulePass *llvm::createOCLToSPIRV() { return new OCLToSPIRV(); } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 6ca64f5cab2ce..8d4ffa818b0a1 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -3604,7 +3604,7 @@ void addPassesForSPIRV(legacy::PassManager &PassMgr, PassMgr.add(createSPIRVLowerSPIRBlocks()); PassMgr.add(createOCLTypeToSPIRV()); PassMgr.add(createSPIRVLowerOCLBlocks()); - PassMgr.add(createOCL20ToSPIRV()); + PassMgr.add(createOCLToSPIRV()); PassMgr.add(createSPIRVRegularizeLLVM()); PassMgr.add(createSPIRVLowerConstExpr()); PassMgr.add(createSPIRVLowerBool()); diff --git a/llvm-spirv/test/transcoding/OpControlBarrier_cl21.ll b/llvm-spirv/test/transcoding/OpControlBarrier_cl21.ll index 1d7941a8fe911..be55f847a0dda 100644 --- a/llvm-spirv/test/transcoding/OpControlBarrier_cl21.ll +++ b/llvm-spirv/test/transcoding/OpControlBarrier_cl21.ll @@ -113,4 +113,4 @@ attributes #2 = { nounwind } !0 = !{} !1 = !{i32 1, i32 2} -!2 = !{i32 2, i32 1} +!2 = !{i32 2, i32 0} From edb267a857a87bc2ee6f3ebd6acebb01037c0454 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 2 Oct 2020 18:56:25 +0300 Subject: [PATCH 537/544] Remove OCL21ToSPIRV pass Since KhronosGroup/SPIRV-LLVM-Translator@6ceea975375b44fe0238fb30f6dffc50a9c33fb5 this pass only handles input files coming out OpenCL C++ 2.1, which technically doesn't exists and therefore this should be a dead code. Additionally: most of the pass functionality (except sub-group barrier handling) is untested and duplicated in OCLToSPIRV pass. --- llvm-spirv/include/LLVMSPIRVLib.h | 5 - llvm-spirv/lib/SPIRV/CMakeLists.txt | 1 - llvm-spirv/lib/SPIRV/OCL21ToSPIRV.cpp | 248 -------------------------- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 1 - 4 files changed, 255 deletions(-) delete mode 100644 llvm-spirv/lib/SPIRV/OCL21ToSPIRV.cpp diff --git a/llvm-spirv/include/LLVMSPIRVLib.h b/llvm-spirv/include/LLVMSPIRVLib.h index 91373ea45e422..7afa71051f9bd 100644 --- a/llvm-spirv/include/LLVMSPIRVLib.h +++ b/llvm-spirv/include/LLVMSPIRVLib.h @@ -52,7 +52,6 @@ namespace llvm { class PassRegistry; void initializeLLVMToSPIRVPass(PassRegistry &); void initializeOCLToSPIRVPass(PassRegistry &); -void initializeOCL21ToSPIRVPass(PassRegistry &); void initializeOCLTypeToSPIRVPass(PassRegistry &); void initializeSPIRVLowerBoolPass(PassRegistry &); void initializeSPIRVLowerConstExprPass(PassRegistry &); @@ -166,10 +165,6 @@ ModulePass *createLLVMToSPIRV(SPIRV::SPIRVModule *); /// functions. ModulePass *createOCLToSPIRV(); -/// Create a pass for translating OCL 2.1 builtin functions to SPIR-V builtin -/// functions. -ModulePass *createOCL21ToSPIRV(); - /// Create a pass for adapting OCL types for SPIRV. ModulePass *createOCLTypeToSPIRV(); diff --git a/llvm-spirv/lib/SPIRV/CMakeLists.txt b/llvm-spirv/lib/SPIRV/CMakeLists.txt index b97ac9ad3edfd..1d0b7f3e69dbe 100644 --- a/llvm-spirv/lib/SPIRV/CMakeLists.txt +++ b/llvm-spirv/lib/SPIRV/CMakeLists.txt @@ -5,7 +5,6 @@ add_llvm_library(LLVMSPIRVLib Mangler/ManglingUtils.cpp Mangler/ParameterType.cpp OCLToSPIRV.cpp - OCL21ToSPIRV.cpp OCLTypeToSPIRV.cpp OCLUtil.cpp VectorComputeUtil.cpp diff --git a/llvm-spirv/lib/SPIRV/OCL21ToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCL21ToSPIRV.cpp deleted file mode 100644 index ec6c951bc6b62..0000000000000 --- a/llvm-spirv/lib/SPIRV/OCL21ToSPIRV.cpp +++ /dev/null @@ -1,248 +0,0 @@ -//===- OCL21ToSPIRV.cpp - Transform OCL21 to SPIR-V builtins ----*- C++ -*-===// -// -// The LLVM/SPIRV Translator -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimers in the documentation -// and/or other materials provided with the distribution. -// Neither the names of Advanced Micro Devices, Inc., nor the names of its -// contributors may be used to endorse or promote products derived from this -// Software without specific prior written permission. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH -// THE SOFTWARE. -// -//===----------------------------------------------------------------------===// -// -// This file implements translation of OCL21 builtin functions. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "cl21tospv" - -#include "OCLUtil.h" -#include "SPIRVInternal.h" -#include "libSPIRV/SPIRVDebug.h" - -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" - -#include - -using namespace llvm; -using namespace SPIRV; -using namespace OCLUtil; - -namespace SPIRV { -class OCL21ToSPIRV : public ModulePass, public InstVisitor { -public: - OCL21ToSPIRV() : ModulePass(ID), M(nullptr), Ctx(nullptr), CLVer(0) { - initializeOCL21ToSPIRVPass(*PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override; - virtual void visitCallInst(CallInst &CI); - - /// Transform SPIR-V convert function - // __spirv{N}Op{ConvertOpName}(src, dummy) - /// => - /// __spirv_{ConvertOpName}_R{TargeTyName} - void visitCallConvert(CallInst *CI, Op OC); - - /// Transform SPIR-V decoration - /// x = __spirv_{OpName}; - /// y = __spirv{N}Op{Decorate}(x, type, value, dummy) - /// => - /// y = __spirv_{OpName}{Postfix(type,value)} - void visitCallDecorate(CallInst *CI); - - /// Transform sub_group_barrier to __spirv_ControlBarrier. - /// sub_group_barrier(scope, flag) => - /// __spirv_ControlBarrier(subgroup, map(scope), map(flag)) - void visitCallSubGroupBarrier(CallInst *CI); - - /// Transform OCL C++ builtin function to SPIR-V builtin function. - /// Assuming there is no argument changes. - /// Should be called at last. - void transBuiltin(CallInst *CI, Op OC); - - static char ID; - -private: - ConstantInt *addInt32(int I) { return getInt32(M, I); } - - Module *M; - LLVMContext *Ctx; - unsigned CLVer; /// OpenCL version as major*10+minor - std::set ValuesToDelete; -}; - -char OCL21ToSPIRV::ID = 0; - -bool OCL21ToSPIRV::runOnModule(Module &Module) { - M = &Module; - Ctx = &M->getContext(); - - auto Src = getSPIRVSource(&Module); - if (std::get<0>(Src) != spv::SourceLanguageOpenCL_CPP) - return false; - - CLVer = std::get<1>(Src); - if (CLVer != kOCLVer::CL21) - return false; - - LLVM_DEBUG(dbgs() << "Enter OCL21ToSPIRV:\n"); - visit(*M); - - for (auto &I : ValuesToDelete) - if (auto Inst = dyn_cast(I)) - Inst->eraseFromParent(); - for (auto &I : ValuesToDelete) - if (auto GV = dyn_cast(I)) - GV->eraseFromParent(); - - LLVM_DEBUG(dbgs() << "After OCL21ToSPIRV:\n" << *M); - verifyRegularizationPass(*M, "OCL21ToSPIRV"); - - return true; -} - -// The order of handling OCL builtin functions is important. -// Workgroup functions need to be handled before pipe functions since -// there are functions fall into both categories. -void OCL21ToSPIRV::visitCallInst(CallInst &CI) { - LLVM_DEBUG(dbgs() << "[visistCallInst] " << CI << '\n'); - auto F = CI.getCalledFunction(); - if (!F) - return; - - auto MangledName = F->getName(); - StringRef DemangledName; - - if (oclIsBuiltin(MangledName, DemangledName)) { - if (DemangledName == kOCLBuiltinName::SubGroupBarrier) { - visitCallSubGroupBarrier(&CI); - return; - } - } - - if (!oclIsBuiltin(MangledName, DemangledName, true)) - return; - LLVM_DEBUG(dbgs() << "DemangledName:" << DemangledName << '\n'); - StringRef Ref(DemangledName); - - Op OC = OpNop; - if (!OpCodeNameMap::rfind(Ref.str(), &OC)) - return; - LLVM_DEBUG(dbgs() << "maps to opcode " << OC << '\n'); - - if (isCvtOpCode(OC)) { - visitCallConvert(&CI, OC); - return; - } - if (OC == OpDecorate) { - visitCallDecorate(&CI); - return; - } - transBuiltin(&CI, OC); -} - -void OCL21ToSPIRV::visitCallConvert(CallInst *CI, Op OC) { - assert(CI->getCalledFunction() && "Unexpected indirect call"); - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV( - M, CI, - [=](CallInst *, std::vector &Args) { - Args.pop_back(); - return getSPIRVFuncName( - OC, kSPIRVPostfix::Divider + - getPostfixForReturnType(CI, OC == OpSConvert || - OC == OpConvertFToS || - OC == OpSatConvertUToS)); - }, - &Attrs); - ValuesToDelete.insert(CI); - ValuesToDelete.insert(CI->getCalledFunction()); -} - -void OCL21ToSPIRV::visitCallDecorate(CallInst *CI) { - auto Target = cast(CI->getArgOperand(0)); - assert(Target->getCalledFunction() && "Unexpected indirect call"); - Function *F = Target->getCalledFunction(); - auto Name = F->getName(); - StringRef DemangledName; - oclIsBuiltin(Name, DemangledName); - - BuiltinFuncMangleInfo Info; - F->setName(mangleBuiltin( - std::string(DemangledName) + kSPIRVPostfix::Divider + - getPostfix(getArgAsDecoration(CI, 1), getArgAsInt(CI, 2)), - getTypes(getArguments(CI)), &Info)); - CI->replaceAllUsesWith(Target); - ValuesToDelete.insert(CI); - ValuesToDelete.insert(CI->getCalledFunction()); -} - -void OCL21ToSPIRV::visitCallSubGroupBarrier(CallInst *CI) { - LLVM_DEBUG(dbgs() << "[visitCallSubGroupBarrier] " << *CI << '\n'); - auto Lit = getBarrierLiterals(CI); - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - Args.resize(3); - // Execution scope - Args[0] = addInt32(map(std::get<2>(Lit))); - // Memory scope - Args[1] = addInt32(map(std::get<1>(Lit))); - // Use sequential consistent memory order by default. - // But if the flags argument is set to 0, we use - // None(Relaxed) memory order. - unsigned MemFenceFlag = std::get<0>(Lit); - OCLMemOrderKind MemOrder = - MemFenceFlag ? OCLMO_seq_cst : OCLMO_relaxed; - Args[2] = addInt32(mapOCLMemSemanticToSPIRV( - MemFenceFlag, MemOrder)); // Memory semantics - return getSPIRVFuncName(OpControlBarrier); - }, - &Attrs); -} - -void OCL21ToSPIRV::transBuiltin(CallInst *CI, Op OC) { - assert(CI->getCalledFunction() && "Unexpected indirect call"); - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - assert(OC != OpExtInst && "not supported"); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - return getSPIRVFuncName(OC); - }, - &Attrs); - ValuesToDelete.insert(CI); - ValuesToDelete.insert(CI->getCalledFunction()); -} - -} // namespace SPIRV - -INITIALIZE_PASS(OCL21ToSPIRV, "cl21tospv", "Transform OCL 2.1 to SPIR-V", false, - false) - -ModulePass *llvm::createOCL21ToSPIRV() { return new OCL21ToSPIRV(); } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 8d4ffa818b0a1..e31580dc06a6d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -3600,7 +3600,6 @@ void addPassesForSPIRV(legacy::PassManager &PassMgr, if (Opts.isSPIRVMemToRegEnabled()) PassMgr.add(createPromoteMemoryToRegisterPass()); PassMgr.add(createPreprocessMetadata()); - PassMgr.add(createOCL21ToSPIRV()); PassMgr.add(createSPIRVLowerSPIRBlocks()); PassMgr.add(createOCLTypeToSPIRV()); PassMgr.add(createSPIRVLowerOCLBlocks()); From a3295793f53da5963b55811e4385f2fb280221f7 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 5 Oct 2020 18:29:09 +0300 Subject: [PATCH 538/544] Apply clang-format to OCLToSPIRV.cpp --- llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp | 226 ++++++++++++++-------------- 1 file changed, 112 insertions(+), 114 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp index 1dc0bcceb04a1..287c0e4b5c05c 100644 --- a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp @@ -595,19 +595,19 @@ void OCLToSPIRV::visitCallNDRange(CallInst *CI, StringRef DemangledName) { } void OCLToSPIRV::visitCallAsyncWorkGroupCopy(CallInst *CI, - StringRef DemangledName) { + StringRef DemangledName) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - if (DemangledName == - OCLUtil::kOCLBuiltinName::AsyncWorkGroupCopy) { - Args.insert(Args.begin() + 3, addSizet(1)); - } - Args.insert(Args.begin(), addInt32(ScopeWorkgroup)); - return getSPIRVFuncName(OpGroupAsyncCopy); - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args) { + if (DemangledName == OCLUtil::kOCLBuiltinName::AsyncWorkGroupCopy) { + Args.insert(Args.begin() + 3, addSizet(1)); + } + Args.insert(Args.begin(), addInt32(ScopeWorkgroup)); + return getSPIRVFuncName(OpGroupAsyncCopy); + }, + &Attrs); } CallInst *OCLToSPIRV::visitCallAtomicCmpXchg(CallInst *CI) { @@ -692,22 +692,23 @@ void OCLToSPIRV::visitCallMemFence(CallInst *CI) { } void OCLToSPIRV::transMemoryBarrier(CallInst *CI, - AtomicWorkItemFenceLiterals Lit) { + AtomicWorkItemFenceLiterals Lit) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - Args.resize(2); - Args[0] = addInt32(map(std::get<2>(Lit))); - Args[1] = addInt32(mapOCLMemSemanticToSPIRV( - std::get<0>(Lit), std::get<1>(Lit))); - return getSPIRVFuncName(OpMemoryBarrier); - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args) { + Args.resize(2); + Args[0] = addInt32(map(std::get<2>(Lit))); + Args[1] = addInt32( + mapOCLMemSemanticToSPIRV(std::get<0>(Lit), std::get<1>(Lit))); + return getSPIRVFuncName(OpMemoryBarrier); + }, + &Attrs); } void OCLToSPIRV::visitCallAtomicLegacy(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { + StringRef DemangledName) { StringRef Stem = DemangledName; if (Stem.startswith("atom_")) Stem = Stem.drop_front(strlen("atom_")); @@ -754,7 +755,7 @@ void OCLToSPIRV::visitCallAtomicLegacy(CallInst *CI, StringRef MangledName, } void OCLToSPIRV::visitCallAtomicCpp11(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { + StringRef DemangledName) { StringRef Stem = DemangledName; if (Stem.startswith("atomic_")) Stem = Stem.drop_front(strlen("atomic_")); @@ -839,28 +840,28 @@ void OCLToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { void OCLToSPIRV::visitCallBarrier(CallInst *CI) { auto Lit = getBarrierLiterals(CI); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - Args.resize(3); - // Execution scope - Args[0] = addInt32(map(std::get<2>(Lit))); - // Memory scope - Args[1] = addInt32(map(std::get<1>(Lit))); - // Use sequential consistent memory order by default. - // But if the flags argument is set to 0, we use - // None(Relaxed) memory order. - unsigned MemFenceFlag = std::get<0>(Lit); - OCLMemOrderKind MemOrder = - MemFenceFlag ? OCLMO_seq_cst : OCLMO_relaxed; - Args[2] = addInt32(mapOCLMemSemanticToSPIRV( - MemFenceFlag, MemOrder)); // Memory semantics - return getSPIRVFuncName(OpControlBarrier); - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args) { + Args.resize(3); + // Execution scope + Args[0] = addInt32(map(std::get<2>(Lit))); + // Memory scope + Args[1] = addInt32(map(std::get<1>(Lit))); + // Use sequential consistent memory order by default. + // But if the flags argument is set to 0, we use + // None(Relaxed) memory order. + unsigned MemFenceFlag = std::get<0>(Lit); + OCLMemOrderKind MemOrder = MemFenceFlag ? OCLMO_seq_cst : OCLMO_relaxed; + Args[2] = addInt32(mapOCLMemSemanticToSPIRV( + MemFenceFlag, MemOrder)); // Memory semantics + return getSPIRVFuncName(OpControlBarrier); + }, + &Attrs); } void OCLToSPIRV::visitCallConvert(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { + StringRef DemangledName) { if (eraseUselessConvert(CI, MangledName, DemangledName)) return; Op OC = OpNop; @@ -904,16 +905,16 @@ void OCLToSPIRV::visitCallConvert(CallInst *CI, StringRef MangledName, } assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - return getSPIRVFuncName(OC, - TargetTyName + Sat + Rounding); - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args) { + return getSPIRVFuncName(OC, TargetTyName + Sat + Rounding); + }, + &Attrs); } void OCLToSPIRV::visitCallGroupBuiltin(CallInst *CI, - StringRef OrigDemangledName) { + StringRef OrigDemangledName) { auto F = CI->getCalledFunction(); std::vector PreOps; std::string DemangledName{OrigDemangledName}; @@ -1047,12 +1048,13 @@ void OCLToSPIRV::transBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { else return; if (!Info.RetTy) - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args) { - Info.PostProc(Args); - return Info.UniqName + Info.Postfix; - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args) { + Info.PostProc(Args); + return Info.UniqName + Info.Postfix; + }, + &Attrs); else mutateCallInstSPIRV( M, CI, @@ -1087,7 +1089,7 @@ void OCLToSPIRV::visitCallReadImageMSAA(CallInst *CI, StringRef MangledName) { } void OCLToSPIRV::visitCallReadImageWithSampler(CallInst *CI, - StringRef MangledName) { + StringRef MangledName) { assert(MangledName.find(kMangledName::Sampler) != StringRef::npos); assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); @@ -1141,8 +1143,7 @@ void OCLToSPIRV::visitCallReadImageWithSampler(CallInst *CI, &Attrs); } -void OCLToSPIRV::visitCallGetImageSize(CallInst *CI, - StringRef DemangledName) { +void OCLToSPIRV::visitCallGetImageSize(CallInst *CI, StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); StringRef TyName; SmallVector SubStrs; @@ -1203,7 +1204,7 @@ void OCLToSPIRV::visitCallGetImageSize(CallInst *CI, /// Remove trivial conversion functions bool OCLToSPIRV::eraseUselessConvert(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { + StringRef DemangledName) { auto TargetTy = CI->getType(); auto SrcTy = CI->getArgOperand(0)->getType(); if (auto *VecTy = dyn_cast(TargetTy)) @@ -1227,7 +1228,7 @@ bool OCLToSPIRV::eraseUselessConvert(CallInst *CI, StringRef MangledName, } void OCLToSPIRV::visitCallBuiltinSimple(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { + StringRef DemangledName) { OCLBuiltinTransInfo Info; Info.MangledName = MangledName.str(); Info.UniqName = DemangledName.str(); @@ -1289,7 +1290,7 @@ void OCLToSPIRV::transWorkItemBuiltinsToVariables() { } void OCLToSPIRV::visitCallReadWriteImage(CallInst *CI, - StringRef DemangledName) { + StringRef DemangledName) { OCLBuiltinTransInfo Info; if (DemangledName.find(kOCLBuiltinName::ReadImage) == 0) Info.UniqName = kOCLBuiltinName::ReadImage; @@ -1371,7 +1372,7 @@ void OCLToSPIRV::visitCallRelational(CallInst *CI, StringRef DemangledName) { } void OCLToSPIRV::visitCallVecLoadStore(CallInst *CI, StringRef MangledName, - StringRef OrigDemangledName) { + StringRef OrigDemangledName) { std::vector PreOps; std::string DemangledName{OrigDemangledName}; if (DemangledName.find(kOCLBuiltinName::VLoadPrefix) == 0 && @@ -1415,15 +1416,15 @@ void OCLToSPIRV::visitCallGetFence(CallInst *CI, StringRef DemangledName) { Op OC = OpNop; OCLSPIRVBuiltinMap::find(DemangledName.str(), &OC); std::string SPIRVName = getSPIRVFuncName(OC); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args, Type *&Ret) { - return SPIRVName; - }, - [=](CallInst *NewCI) -> Instruction * { - return BinaryOperator::CreateLShr(NewCI, getInt32(M, 8), - "", CI); - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args, Type *&Ret) { + return SPIRVName; + }, + [=](CallInst *NewCI) -> Instruction * { + return BinaryOperator::CreateLShr(NewCI, getInt32(M, 8), "", CI); + }, + &Attrs); } void OCLToSPIRV::visitCallDot(CallInst *CI) { @@ -1434,7 +1435,7 @@ void OCLToSPIRV::visitCallDot(CallInst *CI) { } void OCLToSPIRV::visitCallScalToVec(CallInst *CI, StringRef MangledName, - StringRef DemangledName) { + StringRef DemangledName) { // Check if all arguments have the same type - it's simple case. auto Uniform = true; auto IsArg0Vector = isa(CI->getOperand(0)->getType()); @@ -1498,26 +1499,24 @@ void OCLToSPIRV::visitCallScalToVec(CallInst *CI, StringRef MangledName, &Attrs); } -void OCLToSPIRV::visitCallGetImageChannel(CallInst *CI, - StringRef DemangledName, - unsigned int Offset) { +void OCLToSPIRV::visitCallGetImageChannel(CallInst *CI, StringRef DemangledName, + unsigned int Offset) { assert(CI->getCalledFunction() && "Unexpected indirect call"); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); Op OC = OpNop; OCLSPIRVBuiltinMap::find(DemangledName.str(), &OC); std::string SPIRVName = getSPIRVFuncName(OC); - mutateCallInstSPIRV(M, CI, - [=](CallInst *, std::vector &Args, Type *&Ret) { - return SPIRVName; - }, - [=](CallInst *NewCI) -> Instruction * { - return BinaryOperator::CreateAdd( - NewCI, getInt32(M, Offset), "", CI); - }, - &Attrs); + mutateCallInstSPIRV( + M, CI, + [=](CallInst *, std::vector &Args, Type *&Ret) { + return SPIRVName; + }, + [=](CallInst *NewCI) -> Instruction * { + return BinaryOperator::CreateAdd(NewCI, getInt32(M, Offset), "", CI); + }, + &Attrs); } -void OCLToSPIRV::visitCallEnqueueKernel(CallInst *CI, - StringRef DemangledName) { +void OCLToSPIRV::visitCallEnqueueKernel(CallInst *CI, StringRef DemangledName) { const DataLayout &DL = M->getDataLayout(); bool HasEvents = DemangledName.find("events") != StringRef::npos; @@ -1594,29 +1593,29 @@ void OCLToSPIRV::visitCallKernelQuery(CallInst *CI, StringRef DemangledName) { auto *BlockF = cast(getUnderlyingObject(BlockFVal)); AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInst(M, CI, - [=](CallInst *CI, std::vector &Args) { - Value *Param = *Args.rbegin(); - Type *ParamType = getUnderlyingObject(Param)->getType(); - if (PointerType *PT = dyn_cast(ParamType)) { - ParamType = PT->getElementType(); - } - // Last arg corresponds to SPIRV Param operand. - // Insert Invoke in front of Param. - // Add Param Size and Param Align at the end. - Args[BlockFIdx] = BlockF; - Args.push_back(getInt32(M, DL.getTypeStoreSize(ParamType))); - Args.push_back( - getInt32(M, DL.getPrefTypeAlignment(ParamType))); - - Op Opcode = OCLSPIRVBuiltinMap::map(DemangledName.str()); - // Adding "__" postfix, so in case we have multiple such - // functions and their names will have numerical postfix, - // then the numerical postfix will be droped and we will get - // correct function name. - return getSPIRVFuncName(Opcode, kSPIRVName::Postfix); - }, - /*BuiltinFuncMangleInfo*/ nullptr, &Attrs); + mutateCallInst( + M, CI, + [=](CallInst *CI, std::vector &Args) { + Value *Param = *Args.rbegin(); + Type *ParamType = getUnderlyingObject(Param)->getType(); + if (PointerType *PT = dyn_cast(ParamType)) { + ParamType = PT->getElementType(); + } + // Last arg corresponds to SPIRV Param operand. + // Insert Invoke in front of Param. + // Add Param Size and Param Align at the end. + Args[BlockFIdx] = BlockF; + Args.push_back(getInt32(M, DL.getTypeStoreSize(ParamType))); + Args.push_back(getInt32(M, DL.getPrefTypeAlignment(ParamType))); + + Op Opcode = OCLSPIRVBuiltinMap::map(DemangledName.str()); + // Adding "__" postfix, so in case we have multiple such + // functions and their names will have numerical postfix, + // then the numerical postfix will be droped and we will get + // correct function name. + return getSPIRVFuncName(Opcode, kSPIRVName::Postfix); + }, + /*BuiltinFuncMangleInfo*/ nullptr, &Attrs); } // Add postfix to overloaded intel subgroup block read/write builtins @@ -1673,7 +1672,7 @@ void OCLToSPIRV::visitSubgroupBlockWriteINTEL(CallInst *CI) { } void OCLToSPIRV::visitSubgroupImageMediaBlockINTEL(CallInst *CI, - StringRef DemangledName) { + StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); spv::Op OpCode = DemangledName.rfind("read") != StringRef::npos ? spv::OpSubgroupImageMediaBlockReadINTEL @@ -1723,7 +1722,7 @@ static Op getSubgroupAVCIntelMCEOpCodeForWrapper(StringRef DemangledName) { // Handles Subgroup AVC Intel extension generic built-ins. void OCLToSPIRV::visitSubgroupAVCBuiltinCall(CallInst *CI, - StringRef DemangledName) { + StringRef DemangledName) { Op OC = OpNop; std::string FName{DemangledName}; std::string Prefix = kOCLSubgroupsAVCIntel::Prefix; @@ -1762,9 +1761,8 @@ void OCLToSPIRV::visitSubgroupAVCBuiltinCall(CallInst *CI, // 'IME', 'REF' and 'SIC' sets contain wrapper built-ins which don't have // corresponded instructions in SPIRV and should be translated to a // conterpart from 'MCE' with conversion for an argument and result (if needed). -void OCLToSPIRV::visitSubgroupAVCWrapperBuiltinCall(CallInst *CI, - Op WrappedOC, - StringRef DemangledName) { +void OCLToSPIRV::visitSubgroupAVCWrapperBuiltinCall(CallInst *CI, Op WrappedOC, + StringRef DemangledName) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); std::string Prefix = kOCLSubgroupsAVCIntel::Prefix; From 32999cb5b6e68eff35f4157b2056a3d6a9cf4317 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 22 Sep 2020 17:46:37 +0100 Subject: [PATCH 539/544] Conditionalize translation of llvm.global_ctors/dtors Do not translate the llvm.global_ctors and llvm.global_dtors variables to SPIR-V when the function pointers extension is not enabled, because we currently cannot represent the variable without the function pointers extension. --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index e31580dc06a6d..38a0c90518e38 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -2702,7 +2702,14 @@ bool LLVMToSPIRV::transGlobalVariables() { for (auto I = M->global_begin(), E = M->global_end(); I != E; ++I) { if ((*I).getName() == "llvm.global.annotations") transGlobalAnnotation(&(*I)); - else if (MDNode *IO = ((*I).getMetadata("io_pipe_id"))) + else if ((I->getName() == "llvm.global_ctors" || + I->getName() == "llvm.global_dtors") && + !BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_function_pointers)) { + // Function pointers are required to represent structor lists; do not + // translate the variable if function pointers are not available. + continue; + } else if (MDNode *IO = ((*I).getMetadata("io_pipe_id"))) transGlobalIOPipeStorage(&(*I), IO); else if (!transValue(&(*I), nullptr)) return false; From e8e4495c08a61203b0157c32b3d488ccfb8364d7 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 22 Sep 2020 17:46:37 +0100 Subject: [PATCH 540/544] Set Initializer ExecutionMode for global ctors SPIR-V 1.1 adds the Initializer Execution Mode for entry points. Set this execution mode for entry points listed in the llvm.global_ctors variable. --- llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp | 24 +++++++++++++++++++++ llvm-spirv/test/CXX/global-ctor.cl | 21 ++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 llvm-spirv/test/CXX/global-ctor.cl diff --git a/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp b/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp index 20009b18a99b4..160d7bd166cd4 100644 --- a/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp +++ b/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp @@ -68,6 +68,8 @@ class PreprocessMetadata : public ModulePass { bool runOnModule(Module &M) override; void visit(Module *M); + void preprocessCXXStructorList(SPIRVMDBuilder::NamedMDWrapper &EM, + GlobalVariable *V, ExecutionMode EMode); void preprocessOCLMetadata(Module *M, SPIRVMDBuilder *B, SPIRVMDWalker *W); void preprocessVectorComputeMetadata(Module *M, SPIRVMDBuilder *B, SPIRVMDWalker *W); @@ -94,6 +96,24 @@ bool PreprocessMetadata::runOnModule(Module &Module) { return true; } +void PreprocessMetadata::preprocessCXXStructorList( + SPIRVMDBuilder::NamedMDWrapper &EM, GlobalVariable *V, + ExecutionMode EMode) { + auto *List = dyn_cast_or_null(V->getInitializer()); + if (!List) + return; + + for (Value *V : List->operands()) { + auto *Structor = cast(V); + + // Each entry in the list is a struct containing 3 members: + // (priority, function, data), with function being the entry point. + auto *Kernel = cast(Structor->getOperand(1)); + + EM.addOp().add(Kernel).add(EMode).done(); + } +} + void PreprocessMetadata::visit(Module *M) { SPIRVMDBuilder B(*M); SPIRVMDWalker W(*M); @@ -105,6 +125,10 @@ void PreprocessMetadata::visit(Module *M) { // of OpExecutionMode instructions auto EM = B.addNamedMD(kSPIRVMD::ExecutionMode); // !spirv.ExecutionMode = {} + // Process special variables in LLVM IR module. + if (auto *GV = M->getGlobalVariable("llvm.global_ctors")) + preprocessCXXStructorList(EM, GV, spv::ExecutionModeInitializer); + // Add execution modes for kernels. We take it from metadata attached to // the kernel functions. for (Function &Kernel : *M) { diff --git a/llvm-spirv/test/CXX/global-ctor.cl b/llvm-spirv/test/CXX/global-ctor.cl new file mode 100644 index 0000000000000..433b4860f0666 --- /dev/null +++ b/llvm-spirv/test/CXX/global-ctor.cl @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -cl-std=clc++ -emit-llvm-bc -triple spir -O0 %s -o %t.bc +// RUN: llvm-spirv %t.bc -o %t.spv +// RUN: spirv-val %t.spv +// RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +class Something { + public: + Something(int a) : v(a) {} + int v; +}; + +Something g(33); + +void kernel work(global int *out) { + *out = g.v; +} + +// CHECK-SPIRV: EntryPoint 6 [[work:[0-9]+]] "work" +// CHECK-SPIRV-NOT: ExecutionMode [[work]] 33 +// CHECK-SPIRV: EntryPoint 6 [[ctor:[0-9]+]] "_GLOBAL__sub_I_global_ctor.cl" +// CHECK-SPIRV: ExecutionMode [[ctor]] 33 From b035640e6d92ae5482471eea8db92b1af53babdd Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 23 Sep 2020 18:05:22 +0100 Subject: [PATCH 541/544] Translate Initializer to llvm.global_ctors Reconstruct a llvm.global_ctors variable for entry points with the Initializer Execution Mode. --- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 45 ++++++++++++++++++++++++++++ llvm-spirv/lib/SPIRV/SPIRVReader.h | 2 ++ llvm-spirv/test/CXX/global-ctor.cl | 4 +++ 3 files changed, 51 insertions(+) diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 0a45c9301f79d..6cdaf3fb994a4 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -3691,6 +3691,45 @@ void SPIRVToLLVM::transGlobalCtorDtors(SPIRVVariable *BV) { cast(V)->setLinkage(GlobalValue::AppendingLinkage); } +void SPIRVToLLVM::createCXXStructor(const char *ListName, + SmallVectorImpl &Funcs) { + if (Funcs.empty()) + return; + + // If the SPIR-V input contained a variable for the structor list and it + // has already been translated, then don't interfere. + if (M->getGlobalVariable(ListName)) + return; + + // Type of a structor entry: { i32, void ()*, i8* } + Type *PriorityTy = Type::getInt32Ty(*Context); + PointerType *CtorTy = PointerType::getUnqual( + FunctionType::get(Type::getVoidTy(*Context), false)); + PointerType *ComdatTy = Type::getInt8PtrTy(*Context); + StructType *StructorTy = StructType::get(PriorityTy, CtorTy, ComdatTy); + + ArrayType *ArrTy = ArrayType::get(StructorTy, Funcs.size()); + + GlobalVariable *GV = + cast(M->getOrInsertGlobal(ListName, ArrTy)); + GV->setLinkage(GlobalValue::AppendingLinkage); + + // Build the initializer. + SmallVector ArrayElts; + for (auto *F : Funcs) { + SmallVector Elts; + // SPIR-V does not specify an order between Initializers, so set default + // priority. + Elts.push_back(ConstantInt::get(PriorityTy, 65535)); + Elts.push_back(ConstantExpr::getBitCast(F, CtorTy)); + Elts.push_back(ConstantPointerNull::get(ComdatTy)); + ArrayElts.push_back(ConstantStruct::get(StructorTy, Elts)); + } + + Constant *NewArray = ConstantArray::get(ArrTy, ArrayElts); + GV->setInitializer(NewArray); +} + bool SPIRVToLLVM::transFPContractMetadata() { bool ContractOff = false; for (unsigned I = 0, E = BM->getNumFunctions(); I != E; ++I) { @@ -3767,6 +3806,7 @@ static bool transKernelArgTypeMedataFromString(LLVMContext *Ctx, } bool SPIRVToLLVM::transMetadata() { + SmallVector CtorKernels; for (unsigned I = 0, E = BM->getNumFunctions(); I != E; ++I) { SPIRVFunction *BF = BM->getFunction(I); Function *F = static_cast(getTranslatedValue(BF)); @@ -3798,6 +3838,10 @@ bool SPIRVToLLVM::transMetadata() { ConstantInt::get(Type::getInt32Ty(*Context), 1))); F->setMetadata(kSPIR2MD::VecTyHint, MDNode::get(*Context, MetadataVec)); } + // Generate metadata for Initializer. + if (BF->getExecutionMode(ExecutionModeInitializer)) { + CtorKernels.push_back(F); + } // Generate metadata for intel_reqd_sub_group_size if (auto *EM = BF->getExecutionMode(ExecutionModeSubgroupSize)) { auto SizeMD = ConstantAsMetadata::get(getUInt32(M, EM->getLiterals()[0])); @@ -3828,6 +3872,7 @@ bool SPIRVToLLVM::transMetadata() { MemoryModelMD->addOperand( getMDTwoInt(Context, static_cast(BM->getAddressingModel()), static_cast(BM->getMemoryModel()))); + createCXXStructor("llvm.global_ctors", CtorKernels); return true; } diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.h b/llvm-spirv/lib/SPIRV/SPIRVReader.h index f57467008aa1b..d11b74bba3140 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.h +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.h @@ -271,6 +271,8 @@ class SPIRVToLLVM { void transUserSemantic(SPIRV::SPIRVFunction *Fun); void transGlobalAnnotations(); void transGlobalCtorDtors(SPIRVVariable *BV); + void createCXXStructor(const char *ListName, + SmallVectorImpl &Funcs); void transIntelFPGADecorations(SPIRVValue *BV, Value *V); }; // class SPIRVToLLVM diff --git a/llvm-spirv/test/CXX/global-ctor.cl b/llvm-spirv/test/CXX/global-ctor.cl index 433b4860f0666..a5626d44f640d 100644 --- a/llvm-spirv/test/CXX/global-ctor.cl +++ b/llvm-spirv/test/CXX/global-ctor.cl @@ -2,6 +2,7 @@ // RUN: llvm-spirv %t.bc -o %t.spv // RUN: spirv-val %t.spv // RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +// RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM class Something { public: @@ -19,3 +20,6 @@ void kernel work(global int *out) { // CHECK-SPIRV-NOT: ExecutionMode [[work]] 33 // CHECK-SPIRV: EntryPoint 6 [[ctor:[0-9]+]] "_GLOBAL__sub_I_global_ctor.cl" // CHECK-SPIRV: ExecutionMode [[ctor]] 33 + +// CHECK-LLVM: llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @[[CTORNAME:_GLOBAL__sub_I[^ ]+]], i8* null } +// CHECK-LLVM: define spir_kernel void @[[CTORNAME]] From 2036e3c235d146278805c56b17456f01858c3773 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 28 Sep 2020 12:30:35 +0100 Subject: [PATCH 542/544] Only emit Initializer and Finalizer from SPIR-V 1.1 The Initializer and Finalizer Execution Modes were missing before SPIR-V 1.1. --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 13 +++++++++++-- llvm-spirv/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h | 1 + llvm-spirv/test/CXX/global-ctor.cl | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 38a0c90518e38..629dfd51d0107 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -3028,11 +3028,20 @@ bool LLVMToSPIRV::transExecutionMode() { switch (EMode) { case spv::ExecutionModeContractionOff: - case spv::ExecutionModeInitializer: - case spv::ExecutionModeFinalizer: BF->addExecutionMode(BM->add( new SPIRVExecutionMode(BF, static_cast(EMode)))); break; + case spv::ExecutionModeInitializer: + case spv::ExecutionModeFinalizer: + if (BM->isAllowedToUseVersion(VersionNumber::SPIRV_1_1)) { + BF->addExecutionMode(BM->add( + new SPIRVExecutionMode(BF, static_cast(EMode)))); + } else { + getErrorLog().checkError(false, SPIRVEC_Requires1_1, + "Initializer/Finalizer Execution Mode"); + return false; + } + break; case spv::ExecutionModeLocalSize: case spv::ExecutionModeLocalSizeHint: { unsigned X, Y, Z; diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h index eb469408cedc6..a25bc002f2dd6 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h @@ -16,3 +16,4 @@ _SPIRV_OP(FunctionPointers, "Can't translate function pointer:\n") _SPIRV_OP(InvalidInstruction, "Can't translate llvm instruction:\n") _SPIRV_OP(InvalidWordCount, "Can't encode instruction with word count greater than 65535:\n") +_SPIRV_OP(Requires1_1, "Feature requires SPIR-V 1.1 or greater:") diff --git a/llvm-spirv/test/CXX/global-ctor.cl b/llvm-spirv/test/CXX/global-ctor.cl index a5626d44f640d..c83bad5baa4bf 100644 --- a/llvm-spirv/test/CXX/global-ctor.cl +++ b/llvm-spirv/test/CXX/global-ctor.cl @@ -3,6 +3,7 @@ // RUN: spirv-val %t.spv // RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV // RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM +// RUN: not llvm-spirv %t.bc --spirv-max-version=1.0 2>&1 | FileCheck %s --check-prefix=CHECK-SPV10 class Something { public: @@ -23,3 +24,5 @@ void kernel work(global int *out) { // CHECK-LLVM: llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @[[CTORNAME:_GLOBAL__sub_I[^ ]+]], i8* null } // CHECK-LLVM: define spir_kernel void @[[CTORNAME]] + +// CHECK-SPV10: Feature requires SPIR-V 1.1 or greater: Initializer/Finalizer Execution Mode From 36d3cc9fca09f58644ad5674ff796ae160206bb4 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Tue, 6 Oct 2020 13:34:05 +0300 Subject: [PATCH 543/544] Fix emission of SPIR-V friendly IR for OpenCL EIS (#746) Added possibility to emit SPIR-V friendly IR instead of OpenCL-C-like mangled function names for instructions from OpenCL extended instruction set. --- llvm-spirv/lib/SPIRV/SPIRVInternal.h | 5 ++ llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 16 ++++- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 65 +++++++++++++++++ llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h | 4 ++ llvm-spirv/test/OpenCL.std/acos.spvasm | 79 +++++++++++++++++++++ llvm-spirv/test/OpenCL.std/upsample.spvasm | 76 ++++++++++++++++++++ llvm-spirv/test/mangling_upsample.spt | 33 --------- 7 files changed, 243 insertions(+), 35 deletions(-) create mode 100644 llvm-spirv/test/OpenCL.std/acos.spvasm create mode 100644 llvm-spirv/test/OpenCL.std/upsample.spvasm delete mode 100644 llvm-spirv/test/mangling_upsample.spt diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h index edfddf2064bb2..5fb74ed23912f 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h +++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h @@ -935,6 +935,11 @@ bool containsUnsignedAtomicType(StringRef Name); std::string mangleBuiltin(StringRef UniqName, ArrayRef ArgTypes, BuiltinFuncMangleInfo *BtnInfo); +/// Mangle a function from OpenCL extended instruction set in SPIR-V friendly IR +/// manner +std::string getSPIRVFriendlyIRFunctionName(OCLExtOpKind ExtOpId, + ArrayRef ArgTys); + /// Remove cast from a value. Value *removeCast(Value *V); diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 6cdaf3fb994a4..dbc412b8cd100 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -4195,12 +4195,24 @@ Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, std::vector ArgTypes = transTypeVector(BC->getValueTypes(BArgs)); + // TODO: we should always produce SPIR-V friendly IR and apply lowering + // later if needed if (IsPrintf) { - MangledName = "printf"; ArgTypes.resize(1); + } + + if (BM->getDesiredBIsRepresentation() != BIsRepresentation::SPIRVFriendlyIR) { + // Convert extended instruction into an OpenCL built-in + if (IsPrintf) { + MangledName = "printf"; + } else { + mangleOpenClBuiltin(UnmangledName, ArgTypes, MangledName); + } } else { - mangleOpenClBuiltin(UnmangledName, ArgTypes, MangledName); + MangledName = getSPIRVFriendlyIRFunctionName( + static_cast(EntryPoint), ArgTypes); } + SPIRVDBG(spvdbgs() << "[transOCLBuiltinFromExtInst] ModifiedUnmangledName: " << UnmangledName << " MangledName: " << MangledName << '\n'); diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index 71418018ea677..a836ff9bc9c47 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -1556,5 +1556,70 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { } return true; } +} // namespace SPIRV + +namespace { +class OpenCLStdToSPIRVFriendlyIRMangleInfo : public BuiltinFuncMangleInfo { +public: + OpenCLStdToSPIRVFriendlyIRMangleInfo(OCLExtOpKind ExtOpId, + ArrayRef ArgTys) + : ExtOpId(ExtOpId), ArgTys(ArgTys) { + UnmangledName = getSPIRVExtFuncName(SPIRVEIS_OpenCL, ExtOpId); + } + + void init(StringRef) override { + switch (ExtOpId) { + case OpenCLLIB::UAbs: + LLVM_FALLTHROUGH; + case OpenCLLIB::UAbs_diff: + LLVM_FALLTHROUGH; + case OpenCLLIB::UAdd_sat: + LLVM_FALLTHROUGH; + case OpenCLLIB::UHadd: + LLVM_FALLTHROUGH; + case OpenCLLIB::URhadd: + LLVM_FALLTHROUGH; + case OpenCLLIB::UClamp: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMad_hi: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMad_sat: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMax: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMin: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMul_hi: + LLVM_FALLTHROUGH; + case OpenCLLIB::USub_sat: + LLVM_FALLTHROUGH; + case OpenCLLIB::U_Upsample: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMad24: + LLVM_FALLTHROUGH; + case OpenCLLIB::UMul24: + // Treat all arguments as unsigned + addUnsignedArg(-1); + break; + case OpenCLLIB::S_Upsample: + addUnsignedArg(1); + break; + default:; + // No special handling is needed + } + } + +private: + OCLExtOpKind ExtOpId; + ArrayRef ArgTys; +}; +} // namespace + +namespace SPIRV { +std::string getSPIRVFriendlyIRFunctionName(OCLExtOpKind ExtOpId, + ArrayRef ArgTys) { + OpenCLStdToSPIRVFriendlyIRMangleInfo MangleInfo(ExtOpId, ArgTys); + return mangleBuiltin(MangleInfo.getUnmangledName(), ArgTys, &MangleInfo); +} } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h index 2696ac3a75e1a..1903c7992ea12 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h @@ -498,6 +498,10 @@ class SPIRVModule { } } + BIsRepresentation getDesiredBIsRepresentation() const { + return TranslationOpts.getDesiredBIsRepresentation(); + } + // I/O functions friend spv_ostream &operator<<(spv_ostream &O, SPIRVModule &M); friend std::istream &operator>>(std::istream &I, SPIRVModule &M); diff --git a/llvm-spirv/test/OpenCL.std/acos.spvasm b/llvm-spirv/test/OpenCL.std/acos.spvasm new file mode 100644 index 0000000000000..d1c24eef9212a --- /dev/null +++ b/llvm-spirv/test/OpenCL.std/acos.spvasm @@ -0,0 +1,79 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv %t.spv -r --spirv-target-env=SPV-IR -o - | llvm-dis | FileCheck %s --check-prefixes=CHECK,CHECK-SPV-IR +; RUN: llvm-spirv %t.spv -r --spirv-target-env=CL2.0 -o - | llvm-dis | FileCheck %s --check-prefixes=CHECK,CHECK-CL20 +; +; CHECK-LABEL: spir_kernel void @test +; CHECK-SPV-IR: call spir_func float @_Z16__spirv_ocl_acosf(float +; CHECK-SPV-IR: call spir_func double @_Z16__spirv_ocl_acosd(double +; CHECK-CL20: call spir_func float @_Z4acosf(float +; CHECK-CL20: call spir_func double @_Z4acosd(double +; CHECK-LABEL: spir_kernel void @test2 +; CHECK-SPV-IR: call spir_func <4 x float> @_Z16__spirv_ocl_acosDv4_f(<4 x float> +; CHECK-SPV-IR: call spir_func <4 x double> @_Z16__spirv_ocl_acosDv4_d(<4 x double> +; CHECK-CL20: call spir_func <4 x float> @_Z4acosDv4_f(<4 x float> +; CHECK-CL20: call spir_func <4 x double> @_Z4acosDv4_d(<4 x double> + + OpCapability Addresses + OpCapability Kernel + OpCapability Float64 + %1 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %8 "test" + OpEntryPoint Kernel %21 "test2" + %29 = OpString "kernel_arg_type.test.float,double,float*,double*," + %30 = OpString "kernel_arg_type.test2.float4,double4,float4*,double4*," + OpSource OpenCL_C 200000 + OpName %arg1 "arg1" + OpName %arg2 "arg2" + OpName %output1 "output1" + OpName %output2 "output2" + OpName %entry "entry" + OpName %call "call" + OpName %call1 "call1" + OpName %arg1_0 "arg1" + OpName %arg2_0 "arg2" + OpName %output1_0 "output1" + OpName %output2_0 "output2" + OpName %entry_0 "entry" + OpName %call_0 "call" + OpName %call1_0 "call1" + OpDecorate %output1 FuncParamAttr NoCapture + OpDecorate %output2 FuncParamAttr NoCapture + OpDecorate %output1_0 FuncParamAttr NoCapture + OpDecorate %output2_0 FuncParamAttr NoCapture + %void = OpTypeVoid + %float = OpTypeFloat 32 + %double = OpTypeFloat 64 +%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float +%_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double + %7 = OpTypeFunction %void %float %double %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_double + %v4float = OpTypeVector %float 4 + %v4double = OpTypeVector %double 4 +%_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float +%_ptr_CrossWorkgroup_v4double = OpTypePointer CrossWorkgroup %v4double + %20 = OpTypeFunction %void %v4float %v4double %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4double + %8 = OpFunction %void None %7 + %arg1 = OpFunctionParameter %float + %arg2 = OpFunctionParameter %double + %output1 = OpFunctionParameter %_ptr_CrossWorkgroup_float + %output2 = OpFunctionParameter %_ptr_CrossWorkgroup_double + %entry = OpLabel + %call = OpExtInst %float %1 acos %arg1 + OpStore %output1 %call Aligned 4 + %call1 = OpExtInst %double %1 acos %arg2 + OpStore %output2 %call1 Aligned 8 + OpReturn + OpFunctionEnd + %21 = OpFunction %void None %20 + %arg1_0 = OpFunctionParameter %v4float + %arg2_0 = OpFunctionParameter %v4double + %output1_0 = OpFunctionParameter %_ptr_CrossWorkgroup_v4float + %output2_0 = OpFunctionParameter %_ptr_CrossWorkgroup_v4double + %entry_0 = OpLabel + %call_0 = OpExtInst %v4float %1 acos %arg1_0 + OpStore %output1_0 %call_0 Aligned 16 + %call1_0 = OpExtInst %v4double %1 acos %arg2_0 + OpStore %output2_0 %call1_0 Aligned 32 + OpReturn + OpFunctionEnd diff --git a/llvm-spirv/test/OpenCL.std/upsample.spvasm b/llvm-spirv/test/OpenCL.std/upsample.spvasm new file mode 100644 index 0000000000000..c50079f1b8162 --- /dev/null +++ b/llvm-spirv/test/OpenCL.std/upsample.spvasm @@ -0,0 +1,76 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv %t.spv -r --spirv-target-env=SPV-IR -o - | llvm-dis | FileCheck %s --check-prefixes=CHECK,CHECK-SPV-IR +; RUN: llvm-spirv %t.spv -r --spirv-target-env=CL2.0 -o - | llvm-dis | FileCheck %s --check-prefixes=CHECK,CHECK-CL20 +; +; CHECK-LABEL: spir_kernel void @test +; CHECK-SPV-IR: call spir_func i64 @_Z22__spirv_ocl_s_upsampleij(i32 %{{[0-9a-z]+}}, i32 +; CHECK-SPV-IR: call spir_func i64 @_Z22__spirv_ocl_u_upsamplejj(i32 %{{[0-9a-z]+}}, i32 +; CHECK-CL20: call spir_func i64 @_Z8upsampleij(i32 %{{[0-9a-z]+}}, i32 +; CHECK-CL20: call spir_func i64 @_Z8upsamplejj(i32 %{{[0-9a-z]+}}, i32 +; CHECK-LABEL: spir_kernel void @test2 +; CHECK-SPV-IR: call spir_func <4 x i64> @_Z22__spirv_ocl_s_upsampleDv4_iDv4_j(<4 x i32> %{{[0-9a-z]+}}, <4 x i32> +; CHECK-SPV-IR: call spir_func <4 x i64> @_Z22__spirv_ocl_u_upsampleDv4_jS_(<4 x i32> %{{[0-9a-z]+}}, <4 x i32> +; CHECK-CL20: call spir_func <4 x i64> @_Z8upsampleDv4_iDv4_j(<4 x i32> %{{[0-9a-z]+}}, <4 x i32> +; CHECK-CL20: call spir_func <4 x i64> @_Z8upsampleDv4_jS_(<4 x i32> %{{[0-9a-z]+}}, <4 x i32> + + OpCapability Addresses + OpCapability Kernel + OpCapability Int64 + %1 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %7 "test" + OpEntryPoint Kernel %20 "test2" + %28 = OpString "kernel_arg_type.test.int,uint,long*," + %29 = OpString "kernel_arg_type.test2.int4,uint4,ulong4*," + OpSource OpenCL_C 200000 + OpName %a "a" + OpName %b "b" + OpName %c "c" + OpName %entry "entry" + OpName %call "call" + OpName %call1 "call1" + OpName %arrayidx2 "arrayidx2" + OpName %call2 "call2" + OpName %b_0 "b" + OpName %c_0 "c" + OpName %entry_0 "entry" + OpName %call1_0 "call1" + OpName %call3 "call3" + OpName %arrayidx4 "arrayidx4" + OpDecorate %c FuncParamAttr NoCapture + OpDecorate %c_0 FuncParamAttr NoCapture + %uint = OpTypeInt 32 0 + %ulong = OpTypeInt 64 0 + %uint_1 = OpConstant %uint 1 + %void = OpTypeVoid +%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong + %6 = OpTypeFunction %void %uint %uint %_ptr_CrossWorkgroup_ulong + %v4uint = OpTypeVector %uint 4 + %v4ulong = OpTypeVector %ulong 4 +%_ptr_CrossWorkgroup_v4ulong = OpTypePointer CrossWorkgroup %v4ulong + %19 = OpTypeFunction %void %v4uint %v4uint %_ptr_CrossWorkgroup_v4ulong + %7 = OpFunction %void None %6 + %a = OpFunctionParameter %uint + %b = OpFunctionParameter %uint + %c = OpFunctionParameter %_ptr_CrossWorkgroup_ulong + %entry = OpLabel + %call = OpExtInst %ulong %1 s_upsample %a %b + OpStore %c %call Aligned 8 + %call1 = OpExtInst %ulong %1 u_upsample %a %b + %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ulong %c %uint_1 + OpStore %arrayidx2 %call1 Aligned 8 + OpReturn + OpFunctionEnd + %20 = OpFunction %void None %19 + %call2 = OpFunctionParameter %v4uint + %b_0 = OpFunctionParameter %v4uint + %c_0 = OpFunctionParameter %_ptr_CrossWorkgroup_v4ulong + %entry_0 = OpLabel + %call1_0 = OpExtInst %v4ulong %1 s_upsample %call2 %b_0 + OpStore %c_0 %call1_0 Aligned 32 + %call3 = OpExtInst %v4ulong %1 u_upsample %call2 %b_0 + %arrayidx4 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4ulong %c_0 %uint_1 + OpStore %arrayidx4 %call3 Aligned 32 + OpReturn + OpFunctionEnd diff --git a/llvm-spirv/test/mangling_upsample.spt b/llvm-spirv/test/mangling_upsample.spt deleted file mode 100644 index 70d42f575dc1d..0000000000000 --- a/llvm-spirv/test/mangling_upsample.spt +++ /dev/null @@ -1,33 +0,0 @@ -119734787 65536 393230 12 0 -2 Capability Addresses -2 Capability Kernel -2 Capability Int64 -5 ExtInstImport 1 "OpenCL.std" -3 MemoryModel 2 2 -10 EntryPoint 6 4 "_ZTSZ4mainE11fake_kernel" -13 String 11 "kernel_arg_type._ZTSZ4mainE11fake_kernel." -3 Source 4 100000 -4 Name 5 "entry" -6 Name 10 "call2.i.i.i.i" -4 TypeInt 6 32 0 -4 TypeInt 9 64 0 -4 Constant 6 7 0 -4 Constant 6 8 1 -2 TypeVoid 2 -3 TypeFunction 3 2 - - -5 Function 2 4 0 3 - -2 Label 5 -7 ExtInst 9 10 1 s_upsample 7 8 -1 Return - -1 FunctionEnd - -; RUN: llvm-spirv %s -to-binary -o %t.spv -; RUN: spirv-val %t.spv -; RUN: llvm-spirv -r %t.spv -o %t.bc -; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-LLVM: upsampleij From 1321600eb0c9a70930d25a79e736c95f6885c2f5 Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Thu, 8 Oct 2020 20:03:43 +0300 Subject: [PATCH 544/544] [SYCL] Fix conflict resolution issue from cdabc426 --- libclc/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 1054f37d38790..f5e741838cf59 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -198,19 +198,19 @@ file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/generic/libspirv/gen_convert.py add_custom_command( OUTPUT convert-core.cl - COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert-core.cl + COMMAND ${Python3_EXECUTABLE} ${core_script_loc} > convert-core.cl DEPENDS ${core_script_loc} ) add_custom_target( "generate_convert_core.cl" DEPENDS convert-core.cl ) add_custom_command( OUTPUT convert-spirv.cl - COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert-spirv.cl + COMMAND ${Python3_EXECUTABLE} ${spirv_script_loc} > convert-spirv.cl DEPENDS ${spirv_script_loc} ) add_custom_target( "generate_convert_spirv.cl" DEPENDS convert-spirv.cl ) add_custom_command( OUTPUT convert-clc.cl - COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert-clc.cl + COMMAND ${Python3_EXECUTABLE} ${clc_script_loc} > convert-clc.cl DEPENDS ${clc_script_loc} ) add_custom_target( "generate_convert_clc.cl" DEPENDS convert-clc.cl )