diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d7a0a29d6ed55..c07e9f043b7b8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8578,8 +8578,8 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, } if (auto *CI = dyn_cast(Instr)) { - return toVPRecipeResult( - new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI)); + return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0], + CI->getType(), *CI)); } return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 639c2ac685a66..265493eb105a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -829,6 +829,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase { PossiblyExactOp, GEPOp, FPMathOp, + NonNegOp, Other }; @@ -847,6 +848,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { struct GEPFlagsTy { char IsInBounds : 1; }; + struct NonNegFlagsTy { + char NonNeg : 1; + }; struct FastMathFlagsTy { char AllowReassoc : 1; char NoNaNs : 1; @@ -866,6 +870,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase { WrapFlagsTy WrapFlags; ExactFlagsTy ExactFlags; GEPFlagsTy GEPFlags; + NonNegFlagsTy NonNegFlags; FastMathFlagsTy FMFs; unsigned AllFlags; }; @@ -893,6 +898,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { } else if (auto *GEP = dyn_cast(&I)) { OpType = OperationType::GEPOp; GEPFlags.IsInBounds = GEP->isInBounds(); + } else if (auto *PNNI = dyn_cast(&I)) { + OpType = OperationType::NonNegOp; + NonNegFlags.NonNeg = PNNI->hasNonNeg(); } else if (auto *Op = dyn_cast(&I)) { OpType = OperationType::FPMathOp; FMFs = Op->getFastMathFlags(); @@ -921,6 +929,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase { return R->getVPDefID() == VPRecipeBase::VPInstructionSC || R->getVPDefID() == VPRecipeBase::VPWidenSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || + R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC; } @@ -943,6 +952,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { FMFs.NoNaNs = false; FMFs.NoInfs = false; break; + case OperationType::NonNegOp: + NonNegFlags.NonNeg = false; + break; case OperationType::Cmp: case OperationType::Other: break; @@ -971,6 +983,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase { I->setHasAllowContract(FMFs.AllowContract); I->setHasApproxFunc(FMFs.ApproxFunc); break; + case OperationType::NonNegOp: + I->setNonNeg(NonNegFlags.NonNeg); + break; case OperationType::Cmp: case OperationType::Other: break; @@ -1177,7 +1192,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue { }; /// VPWidenCastRecipe is a recipe to create vector cast instructions. -class VPWidenCastRecipe : public VPRecipeBase, public VPValue { +class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue { /// Cast instruction opcode. Instruction::CastOps Opcode; @@ -1186,15 +1201,19 @@ class VPWidenCastRecipe : public VPRecipeBase, public VPValue { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - CastInst *UI = nullptr) - : VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI), + CastInst &UI) + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI), Opcode(Opcode), ResultTy(ResultTy) { - assert((!UI || UI->getOpcode() == Opcode) && + assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); - assert((!UI || UI->getType() == ResultTy) && + assert(UI.getType() == ResultTy && "result type of underlying cast doesn't match"); } + VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr), + Opcode(Opcode), ResultTy(ResultTy) {} + ~VPWidenCastRecipe() override = default; VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f73487ffb6a7e..462cd201b6fcc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -652,6 +652,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { if (GEPFlags.IsInBounds) O << " inbounds"; break; + case OperationType::NonNegOp: + if (NonNegFlags.NonNeg) + O << " nneg"; + break; case OperationType::Other: break; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2baaf08cd0544..ea90ed4a21b1a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -81,7 +81,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes( NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); } else if (auto *CI = dyn_cast(Inst)) { NewRecipe = new VPWidenCastRecipe( - CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI); + CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI); } else { NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); } diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 5694367dd1f90..bc5f137564b37 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -268,6 +268,84 @@ loop.exit: ret void } +define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { +; CHECK-LABEL: define void @drop_zext_nneg( +; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 +; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE]] +; CHECK: then: +; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[TMP8]] to i64 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]] +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]] +; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[IDX2]], align 8 +; CHECK-NEXT: br label [[ELSE]] +; CHECK: else: +; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ] +; CHECK-NEXT: store double [[PHI]], ptr [[P1]], align 8 +; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %body + +body: + %iv = phi i64 [ %next, %else ], [ 0, %entry ] + %0 = trunc i64 %iv to i32 + %c = icmp eq i32 %0, 0 + br i1 %c, label %then, label %else + +then: + %zext = zext nneg i32 %0 to i64 + %idx1 = getelementptr double, ptr %p, i64 %zext + %idx2 = getelementptr double, ptr %p, i64 %zext + %1 = load double, ptr %idx2, align 8 + br label %else + +else: + %phi = phi double [ %1, %then ], [ 0.000000e+00, %body ] + store double %phi, ptr %p1, align 8 + %next = add i64 %iv, 1 + %cmp = icmp eq i64 %next, 0 + br i1 %cmp, label %exit, label %body + +exit: + ret void +} + ; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather. define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 { diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 6cc69a808f461..5a7c3dcc8b686 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -806,6 +806,47 @@ exit: ret void } +define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { +; CHECK-LABEL: LV: Checking a loop in 'zext_nneg' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: Live-in ir<0> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%1> = CANONICAL-INDUCTION ir<0>, vp<%8> +; CHECK-NEXT: vp<%2> = DERIVED-IV ir<0> + vp<%1> * ir<1> (truncated to i32) +; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> +; CHECK-NEXT: CLONE ir<%zext> = zext nneg vp<%3> +; CHECK-NEXT: CLONE ir<%idx2> = getelementptr ir<%p>, ir<%zext> +; CHECK-NEXT: WIDEN ir<%1> = load ir<%idx2> +; CHECK-NEXT: REPLICATE store ir<%1>, ir<%p1> +; CHECK-NEXT: EMIT vp<%8> = VF * UF + nuw vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%0> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %body + +body: + %iv = phi i64 [ %next, %body ], [ 0, %entry ] + %0 = trunc i64 %iv to i32 + %zext = zext nneg i32 %0 to i64 + %idx2 = getelementptr double, ptr %p, i64 %zext + %1 = load double, ptr %idx2, align 8 + store double %1, ptr %p1, align 8 + %next = add i64 %iv, 1 + %cmp = icmp eq i64 %next, 0 + br i1 %cmp, label %exit, label %body + +exit: + ret void +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4}