diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index e4254d1e64bec..f64165e2e4bc6 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1727,7 +1727,7 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] // FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]] // FINITEONLY: _ZL3jnfif.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret float [[RETVAL_0_I]] // // APPROX-LABEL: @test_jnf( @@ -1830,7 +1830,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] // FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] // FINITEONLY: _ZL2jnid.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret double [[RETVAL_0_I]] // // APPROX-LABEL: @test_jn( @@ -4461,7 +4461,7 @@ extern "C" __device__ double test_y1(double x) { // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] // FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] // FINITEONLY: _ZL3ynfif.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret float [[RETVAL_0_I]] // // APPROX-LABEL: @test_ynf( @@ -4564,7 +4564,7 @@ extern "C" __device__ float test_ynf(int x, float y) { // FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] // FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] // FINITEONLY: _ZL2ynid.exit: -// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // FINITEONLY-NEXT: ret double [[RETVAL_0_I]] // // APPROX-LABEL: @test_yn( diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 656bb1ebd1161..aa3f825265f81 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -394,6 +394,12 @@ struct PromoteMem2Reg { /// Whether the function has the no-signed-zeros-fp-math attribute set. bool NoSignedZeros = false; + /// Whether the function has the no-nans-fp-math attribute set. + bool NoNaNs = false; + + /// Whether the function has the no-infs-fp-math attribute set. + bool NoInfs = false; + public: PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, AssumptionCache *AC) @@ -752,6 +758,8 @@ void PromoteMem2Reg::run() { ForwardIDFCalculator IDF(DT); NoSignedZeros = F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool(); + NoNaNs = F.getFnAttribute("no-nans-fp-math").getValueAsBool(); + NoInfs = F.getFnAttribute("no-infs-fp-math").getValueAsBool(); for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; @@ -1132,13 +1140,24 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); - // For the sequence `return X > 0.0 ? X : -X`, it is expected that this - // results in fabs intrinsic. However, without no-signed-zeros(nsz) flag - // on the phi node generated at this stage, fabs folding does not - // happen. So, we try to infer nsz flag from the function attributes to - // enable this fabs folding. - if (isa(APN) && NoSignedZeros) - APN->setHasNoSignedZeros(true); + if (isa(APN)) { + // For the sequence `return X > 0.0 ? X : -X`, it is expected that + // this results in fabs intrinsic. However, without + // no-signed-zeros(nsz) flag on the phi node generated at this stage, + // fabs folding does not happen. So, we try to infer nsz flag from the + // function attributes to enable this fabs folding. + if (NoSignedZeros) + APN->setHasNoSignedZeros(true); + + // This allows select instruction folding relevant to floating point + // reductions whose operand is a PHI. + if (NoNaNs) + APN->setHasNoNaNs(true); + + // Handle NoInfs flag too. + if (NoInfs) + APN->setHasNoInfs(true); + } // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll index 2cc26363daf9c..c669586b4423e 100644 --- a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll +++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll @@ -77,3 +77,137 @@ return: ; preds = %entry,%if.then %retval = load double, ptr %x.addr ret double %retval } + +define double @phi_with_nnan(double %x) "no-nans-fp-math"="true" { +; CHECK-LABEL: define double @phi_with_nnan( +; CHECK-SAME: double [[X:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nnan double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[X_ADDR_0]] +; +entry: + %x.addr = alloca double + %cmp = fcmp olt double %x, 0.0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %fneg = fneg double %x + store double %fneg, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load double, ptr %x.addr + ret double %retval +} + +define <2 x double> @vector_phi_with_nnan(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-nans-fp-math"="true" { +; CHECK-LABEL: define <2 x double> @vector_phi_with_nnan( +; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nnan <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]] +; +entry: + %x.addr = alloca <2 x double> + store <2 x double> %a, ptr %x.addr + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + store <2 x double> %b, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load <2 x double>, ptr %x.addr + ret <2 x double> %retval +} + +define double @phi_without_nnan(double %x) "no-nans-fp-math"="false" { +; CHECK-LABEL: define double @phi_without_nnan( +; CHECK-SAME: double [[X:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[X_ADDR_0]] +; +entry: + %x.addr = alloca double + %cmp = fcmp olt double %x, 0.0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %fneg = fneg double %x + store double %fneg, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load double, ptr %x.addr + ret double %retval +} + +define <2 x double> @vector_phi_with_ninf(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-infs-fp-math"="true" { +; CHECK-LABEL: define <2 x double> @vector_phi_with_ninf( +; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi ninf <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]] +; +entry: + %x.addr = alloca <2 x double> + store <2 x double> %a, ptr %x.addr + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + store <2 x double> %b, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load <2 x double>, ptr %x.addr + ret <2 x double> %retval +} + +define double @phi_without_ninf(double %x) "no-infs-fp-math"="false" { +; CHECK-LABEL: define double @phi_without_ninf( +; CHECK-SAME: double [[X:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[X_ADDR_0]] +; +entry: + %x.addr = alloca double + %cmp = fcmp olt double %x, 0.0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %fneg = fneg double %x + store double %fneg, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load double, ptr %x.addr + ret double %retval +}