diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index bf958e100f2ac..2b9be43eadb7a 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2243,6 +2243,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::log2: ISD = ISD::FLOG2; break; + case Intrinsic::ldexp: + ISD = ISD::FLDEXP; + break; case Intrinsic::fabs: ISD = ISD::FABS; break; @@ -2297,6 +2300,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::roundeven: ISD = ISD::FROUNDEVEN; break; + case Intrinsic::lround: + ISD = ISD::LROUND; + break; + case Intrinsic::llround: + ISD = ISD::LLROUND; + break; case Intrinsic::pow: ISD = ISD::FPOW; break; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 63fccee63c0ae..a391e92e84fc6 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::exp: case Intrinsic::exp10: case Intrinsic::exp2: + case Intrinsic::ldexp: case Intrinsic::log: case Intrinsic::log10: case Intrinsic::log2: @@ -108,6 +109,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::canonicalize: case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: + case Intrinsic::lround: + case Intrinsic::llround: case Intrinsic::lrint: case Intrinsic::llrint: case Intrinsic::ucmp: @@ -189,6 +192,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( switch (ID) { case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: + case Intrinsic::lround: + case Intrinsic::llround: case Intrinsic::lrint: case Intrinsic::llrint: case Intrinsic::vp_lrint: @@ -203,6 +208,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( case Intrinsic::vp_is_fpclass: return OpdIdx == 0; case Intrinsic::powi: + case Intrinsic::ldexp: return OpdIdx == -1 || OpdIdx == 1; default: return OpdIdx == -1; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/ldexp.ll b/llvm/test/Analysis/CostModel/AMDGPU/ldexp.ll index 2b1b5906ad017..260cf696270cd 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/ldexp.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/ldexp.ll @@ -13,45 +13,45 @@ define void @ldexp_f16_i32() { ; GFX7-LABEL: 'ldexp_f16_i32' ; GFX7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> undef, <2 x i32> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> undef, <4 x i32> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> undef, <8 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> undef, <16 x i32> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX8PLUS-LABEL: 'ldexp_f16_i32' -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> undef, <2 x i32> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> undef, <4 x i32> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> undef, <8 x i32> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> undef, <16 x i32> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> undef, <2 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> undef, <4 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> undef, <8 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> undef, <16 x i32> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) ; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'ldexp_f16_i32' ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> undef, <2 x i32> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> undef, <4 x i32> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> undef, <8 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> undef, <16 x i32> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GFX8PLUS-SIZE-LABEL: 'ldexp_f16_i32' -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> undef, <2 x i32> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> undef, <4 x i32> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> undef, <8 x i32> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> undef, <16 x i32> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> undef, <2 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> undef, <3 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> undef, <4 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i32(<5 x half> undef, <5 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> undef, <8 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> undef, <16 x i32> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i32(<17 x half> undef, <17 x i32> undef) ; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = call half @llvm.ldexp.f16.i32(half undef, i32 undef) @@ -69,45 +69,45 @@ define void @ldexp_f16_i16() { ; GFX7-LABEL: 'ldexp_f16_i16' ; GFX7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> undef, <2 x i16> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> undef, <4 x i16> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> undef, <8 x i16> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> undef, <16 x i16> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX8PLUS-LABEL: 'ldexp_f16_i16' -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> undef, <2 x i16> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> undef, <4 x i16> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> undef, <8 x i16> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> undef, <16 x i16> undef) -; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> undef, <2 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> undef, <4 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> undef, <8 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> undef, <16 x i16> undef) +; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) ; GFX8PLUS-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'ldexp_f16_i16' ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> undef, <2 x i16> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> undef, <4 x i16> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> undef, <8 x i16> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> undef, <16 x i16> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GFX8PLUS-SIZE-LABEL: 'ldexp_f16_i16' -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> undef, <2 x i16> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> undef, <4 x i16> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> undef, <8 x i16> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> undef, <16 x i16> undef) -; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f16 = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> undef, <2 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> undef, <3 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> undef, <4 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = call <5 x half> @llvm.ldexp.v5f16.v5i16(<5 x half> undef, <5 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> undef, <8 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half> undef, <16 x i16> undef) +; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v17f16 = call <17 x half> @llvm.ldexp.v17f16.v17i16(<17 x half> undef, <17 x i16> undef) ; GFX8PLUS-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = call half @llvm.ldexp.f16.i16(half undef, i16 undef) @@ -125,12 +125,12 @@ define void @ldexp_bf16() { ; GFX7-LABEL: 'ldexp_bf16' ; GFX7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.ldexp.bf16.i32(bfloat undef, i32 undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2bf16 = call <2 x bfloat> @llvm.ldexp.v2bf16.v2i32(<2 x bfloat> undef, <2 x i32> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3bf16 = call <3 x bfloat> @llvm.ldexp.v3bf16.v3i32(<3 x bfloat> undef, <3 x i32> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3bf16 = call <3 x bfloat> @llvm.ldexp.v3bf16.v3i32(<3 x bfloat> undef, <3 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4bf16 = call <4 x bfloat> @llvm.ldexp.v4bf16.v4i32(<4 x bfloat> undef, <4 x i32> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5bf16 = call <5 x bfloat> @llvm.ldexp.v5bf16.v5i32(<5 x bfloat> undef, <5 x i32> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5bf16 = call <5 x bfloat> @llvm.ldexp.v5bf16.v5i32(<5 x bfloat> undef, <5 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8bf16 = call <8 x bfloat> @llvm.ldexp.v8bf16.v8i32(<8 x bfloat> undef, <8 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16bf16 = call <16 x bfloat> @llvm.ldexp.v16bf16.v16i32(<16 x bfloat> undef, <16 x i32> undef) -; GFX7-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17bf16 = call <17 x bfloat> @llvm.ldexp.v17bf16.v17i32(<17 x bfloat> undef, <17 x i32> undef) +; GFX7-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17bf16 = call <17 x bfloat> @llvm.ldexp.v17bf16.v17i32(<17 x bfloat> undef, <17 x i32> undef) ; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX8PLUS-LABEL: 'ldexp_bf16' @@ -147,12 +147,12 @@ define void @ldexp_bf16() { ; GFX7-SIZE-LABEL: 'ldexp_bf16' ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = call bfloat @llvm.ldexp.bf16.i32(bfloat undef, i32 undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2bf16 = call <2 x bfloat> @llvm.ldexp.v2bf16.v2i32(<2 x bfloat> undef, <2 x i32> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3bf16 = call <3 x bfloat> @llvm.ldexp.v3bf16.v3i32(<3 x bfloat> undef, <3 x i32> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3bf16 = call <3 x bfloat> @llvm.ldexp.v3bf16.v3i32(<3 x bfloat> undef, <3 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4bf16 = call <4 x bfloat> @llvm.ldexp.v4bf16.v4i32(<4 x bfloat> undef, <4 x i32> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5bf16 = call <5 x bfloat> @llvm.ldexp.v5bf16.v5i32(<5 x bfloat> undef, <5 x i32> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5bf16 = call <5 x bfloat> @llvm.ldexp.v5bf16.v5i32(<5 x bfloat> undef, <5 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8bf16 = call <8 x bfloat> @llvm.ldexp.v8bf16.v8i32(<8 x bfloat> undef, <8 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16bf16 = call <16 x bfloat> @llvm.ldexp.v16bf16.v16i32(<16 x bfloat> undef, <16 x i32> undef) -; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17bf16 = call <17 x bfloat> @llvm.ldexp.v17bf16.v17i32(<17 x bfloat> undef, <17 x i32> undef) +; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17bf16 = call <17 x bfloat> @llvm.ldexp.v17bf16.v17i32(<17 x bfloat> undef, <17 x i32> undef) ; GFX7-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GFX8PLUS-SIZE-LABEL: 'ldexp_bf16' diff --git a/llvm/test/Analysis/CostModel/RISCV/exp.ll b/llvm/test/Analysis/CostModel/RISCV/exp.ll index fa7a9451f567a..55c3f5554eff0 100644 --- a/llvm/test/Analysis/CostModel/RISCV/exp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/exp.ll @@ -3,51 +3,51 @@ define void @ldexp() { ; CHECK-LABEL: 'ldexp' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.ldexp.bf16.i32(bfloat poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x bfloat> @llvm.ldexp.v2bf16.v2i32(<2 x bfloat> poison, <2 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x bfloat> @llvm.ldexp.v4bf16.v4i32(<4 x bfloat> poison, <4 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x bfloat> @llvm.ldexp.v8bf16.v8i32(<8 x bfloat> poison, <8 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %5 = call <16 x bfloat> @llvm.ldexp.v16bf16.v16i32(<16 x bfloat> poison, <16 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call bfloat @llvm.ldexp.bf16.i32(bfloat poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %2 = call <2 x bfloat> @llvm.ldexp.v2bf16.v2i32(<2 x bfloat> poison, <2 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %3 = call <4 x bfloat> @llvm.ldexp.v4bf16.v4i32(<4 x bfloat> poison, <4 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %4 = call <8 x bfloat> @llvm.ldexp.v8bf16.v8i32(<8 x bfloat> poison, <8 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x bfloat> @llvm.ldexp.v16bf16.v16i32(<16 x bfloat> poison, <16 x i32> poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.ldexp.nxv1bf16.nxv1i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.ldexp.nxv2bf16.nxv2i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.ldexp.nxv4bf16.nxv4i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.ldexp.nxv8bf16.nxv8i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.ldexp.nxv16bf16.nxv16i32( poison, poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call float @llvm.ldexp.f32.i32(float poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> poison, <2 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> poison, <4 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> poison, <8 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float> poison, <16 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call float @llvm.ldexp.f32.i32(float poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %12 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> poison, <2 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %13 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> poison, <4 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> poison, <8 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float> poison, <16 x i32> poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.ldexp.nxv1f32.nxv1i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.ldexp.nxv2f32.nxv2i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.ldexp.nxv4f32.nxv4i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %19 = call @llvm.ldexp.nxv8f32.nxv8i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %20 = call @llvm.ldexp.nxv16f32.nxv16i32( poison, poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call double @llvm.ldexp.f64.i32(double poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> poison, <2 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %23 = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> poison, <4 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %24 = call <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double> poison, <8 x i32> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %25 = call <16 x double> @llvm.ldexp.v16f64.v16i32(<16 x double> poison, <16 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %21 = call double @llvm.ldexp.f64.i32(double poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %22 = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> poison, <2 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %23 = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> poison, <4 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %24 = call <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double> poison, <8 x i32> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %25 = call <16 x double> @llvm.ldexp.v16f64.v16i32(<16 x double> poison, <16 x i32> poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %26 = call @llvm.ldexp.nxv1f64.nxv1i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %27 = call @llvm.ldexp.nxv2f64.nxv2i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %28 = call @llvm.ldexp.nxv4f64.nxv4i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %29 = call @llvm.ldexp.nxv8f64.nxv8i32( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %30 = call @llvm.ldexp.nxv16f64.nxv16i32( poison, poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call float @llvm.ldexp.f32.i64(float poison, i64 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float> poison, <2 x i64> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %33 = call <4 x float> @llvm.ldexp.v4f32.v4i64(<4 x float> poison, <4 x i64> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %34 = call <8 x float> @llvm.ldexp.v8f32.v8i64(<8 x float> poison, <8 x i64> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %35 = call <16 x float> @llvm.ldexp.v16f32.v16i64(<16 x float> poison, <16 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %31 = call float @llvm.ldexp.f32.i64(float poison, i64 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %32 = call <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float> poison, <2 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %33 = call <4 x float> @llvm.ldexp.v4f32.v4i64(<4 x float> poison, <4 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %34 = call <8 x float> @llvm.ldexp.v8f32.v8i64(<8 x float> poison, <8 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %35 = call <16 x float> @llvm.ldexp.v16f32.v16i64(<16 x float> poison, <16 x i64> poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %36 = call @llvm.ldexp.nxv1f32.nxv1i64( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.ldexp.nxv2f32.nxv2i64( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.ldexp.nxv4f32.nxv4i64( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.ldexp.nxv8f32.nxv8i64( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %40 = call @llvm.ldexp.nxv16f32.nxv16i64( poison, poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call double @llvm.ldexp.f64.i64(double poison, i64 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %42 = call <2 x double> @llvm.ldexp.v2f64.v2i64(<2 x double> poison, <2 x i64> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %43 = call <4 x double> @llvm.ldexp.v4f64.v4i64(<4 x double> poison, <4 x i64> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %44 = call <8 x double> @llvm.ldexp.v8f64.v8i64(<8 x double> poison, <8 x i64> poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %45 = call <16 x double> @llvm.ldexp.v16f64.v16i64(<16 x double> poison, <16 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %41 = call double @llvm.ldexp.f64.i64(double poison, i64 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %42 = call <2 x double> @llvm.ldexp.v2f64.v2i64(<2 x double> poison, <2 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %43 = call <4 x double> @llvm.ldexp.v4f64.v4i64(<4 x double> poison, <4 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %44 = call <8 x double> @llvm.ldexp.v8f64.v8i64(<8 x double> poison, <8 x i64> poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %45 = call <16 x double> @llvm.ldexp.v16f64.v16i64(<16 x double> poison, <16 x i64> poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %46 = call @llvm.ldexp.nxv1f64.nxv1i64( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %47 = call @llvm.ldexp.nxv2f64.nxv2i64( poison, poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %48 = call @llvm.ldexp.nxv4f64.nxv4i64( poison, poison) diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 9c910d70807a1..10d83a456d0e2 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -324,6 +324,56 @@ for.end: ; preds = %for.body, %entry declare double @llvm.exp2.f64(double) +define void @ldexp_f32i32(i32 %n, ptr %y, ptr %x, i32 %exp) { +; CHECK-LABEL: @ldexp_f32i32( +; CHECK: llvm.ldexp.v4f32.v4i32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call float @llvm.ldexp.f32.i32(float %0, i32 %exp) + %arrayidx2 = getelementptr inbounds float, ptr %x, i32 %iv + store float %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.ldexp.f32.i32(float, i32) + +define void @ldexp_f64i32(i32 %n, ptr %y, ptr %x, i32 %exp) { +; CHECK-LABEL: @ldexp_f64i32( +; CHECK: llvm.ldexp.v4f64.v4i32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call double @llvm.ldexp.f64.i32(double %0, i32 %exp) + %arrayidx2 = getelementptr inbounds double, ptr %x, i32 %iv + store double %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.ldexp.f64.i32(double, i32) + define void @log_f32(i32 %n, ptr %y, ptr %x) { ; CHECK-LABEL: @log_f32( ; CHECK: llvm.log.v4f32 @@ -976,6 +1026,157 @@ for.end: ; preds = %for.body, %entry declare double @llvm.roundeven.f64(double) + +define void @lround_i32f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i32f32( +; CHECK: llvm.lround.v4i32.v4f32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call i32 @llvm.lround.i32.f32(float %0) + %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv + store i32 %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i32 @llvm.lround.i32.f32(float) + +define void @lround_i32f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i32f64( +; CHECK: llvm.lround.v4i32.v4f64 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call i32 @llvm.lround.i32.f64(double %0) + %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv + store i32 %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i32 @llvm.lround.i32.f64(double) + +define void @lround_i64f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i64f32( +; CHECK: llvm.lround.v4i64.v4f32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call i64 @llvm.lround.i64.f32(float %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.lround.i64.f32(float) + +define void @lround_i64f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i64f64( +; CHECK: llvm.lround.v4i64.v4f64 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call i64 @llvm.lround.i64.f64(double %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.lround.i64.f64(double) + +define void @llround_i64f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @llround_i64f32( +; CHECK: llvm.llround.v4i64.v4f32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call i64 @llvm.llround.i64.f32(float %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.llround.i64.f32(float) + +define void @llround_i64f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @llround_i64f64( +; CHECK: llvm.llround.v4i64.v4f64 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call i64 @llvm.llround.i64.f64(double %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.llround.i64.f64(double) + define void @fma_f32(i32 %n, ptr %y, ptr %x, ptr %z, ptr %w) { ; CHECK-LABEL: @fma_f32( ; CHECK: llvm.fma.v4f32 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll new file mode 100644 index 0000000000000..dcb80c9f0e0a9 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @ldexp_f32i32(ptr %x, ptr %y, i32 %exp, i32 %n) { +; CHECK-LABEL: @ldexp_f32i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L0]], i32 [[EXP:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L2]], i32 [[EXP]]) +; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L4]], i32 [[EXP]]) +; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L6]], i32 [[EXP]]) +; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1 +; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2 +; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3 +; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call float @llvm.ldexp.f32.i32(float %l0, i32 %exp) + %l3 = tail call float @llvm.ldexp.f32.i32(float %l2, i32 %exp) + %l5 = tail call float @llvm.ldexp.f32.i32(float %l4, i32 %exp) + %l7 = tail call float @llvm.ldexp.f32.i32(float %l6, i32 %exp) + store float %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1 + store float %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2 + store float %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3 + store float %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @ldexp_f64i32(ptr %x, ptr %y, i32 %exp, i32 %n) { +; CHECK-LABEL: @ldexp_f64i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L0]], i32 [[EXP:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L2]], i32 [[EXP]]) +; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L4]], i32 [[EXP]]) +; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L6]], i32 [[EXP]]) +; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1 +; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2 +; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3 +; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call double @llvm.ldexp.f64.i32(double %l0, i32 %exp) + %l3 = tail call double @llvm.ldexp.f64.i32(double %l2, i32 %exp) + %l5 = tail call double @llvm.ldexp.f64.i32(double %l4, i32 %exp) + %l7 = tail call double @llvm.ldexp.f64.i32(double %l6, i32 %exp) + store double %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1 + store double %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2 + store double %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3 + store double %l7, ptr %arrayidx2.3, align 4 + ret void +} + +declare float @llvm.ldexp.f32.i32(float, i32) +declare double @llvm.ldexp.f64.i32(double, i32) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll new file mode 100644 index 0000000000000..07a3fe7d0bbc5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll @@ -0,0 +1,280 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @lround_i32f32(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @lround_i32f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L6]]) +; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1 +; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2 +; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3 +; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i32 @llvm.lround.i32.f32(float %l0) + %l3 = tail call i32 @llvm.lround.i32.f32(float %l2) + %l5 = tail call i32 @llvm.lround.i32.f32(float %l4) + %l7 = tail call i32 @llvm.lround.i32.f32(float %l6) + store i32 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1 + store i32 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2 + store i32 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3 + store i32 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @lround_i32f64(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @lround_i32f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L6]]) +; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1 +; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2 +; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3 +; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call i32 @llvm.lround.i32.f64(double %l0) + %l3 = tail call i32 @llvm.lround.i32.f64(double %l2) + %l5 = tail call i32 @llvm.lround.i32.f64(double %l4) + %l7 = tail call i32 @llvm.lround.i32.f64(double %l6) + store i32 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1 + store i32 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2 + store i32 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3 + store i32 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @lround_i64f32(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @lround_i64f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.lround.i64.f32(float %l0) + %l3 = tail call i64 @llvm.lround.i64.f32(float %l2) + %l5 = tail call i64 @llvm.lround.i64.f32(float %l4) + %l7 = tail call i64 @llvm.lround.i64.f32(float %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @lround_i64f64(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @lround_i64f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.lround.i64.f64(double %l0) + %l3 = tail call i64 @llvm.lround.i64.f64(double %l2) + %l5 = tail call i64 @llvm.lround.i64.f64(double %l4) + %l7 = tail call i64 @llvm.lround.i64.f64(double %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @llround_i64f32(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @llround_i64f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.llround.i64.f32(float %l0) + %l3 = tail call i64 @llvm.llround.i64.f32(float %l2) + %l5 = tail call i64 @llvm.llround.i64.f32(float %l4) + %l7 = tail call i64 @llvm.llround.i64.f32(float %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @llround_i64f64(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @llround_i64f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.llround.i64.f64(double %l0) + %l3 = tail call i64 @llvm.llround.i64.f64(double %l2) + %l5 = tail call i64 @llvm.llround.i64.f64(double %l4) + %l7 = tail call i64 @llvm.llround.i64.f64(double %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +declare i32 @llvm.lround.i32.f32(float) +declare i64 @llvm.lround.i64.f32(float) +declare i64 @llvm.lround.i64.f64(double) +declare i64 @llvm.llround.i64.f32(float) +declare i64 @llvm.llround.i64.f64(double) diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll index cee44ef434260..070c765294ba1 100644 --- a/llvm/test/Transforms/Scalarizer/intrinsics.ll +++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll @@ -8,6 +8,7 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) ; Ternary fp declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) @@ -32,6 +33,8 @@ declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>) ; Unary fp operand, int return type declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float>) declare <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float>) +declare <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float>) +declare <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float>) ; Bool return type, overloaded on fp operand type declare <2 x i1> @llvm.is.fpclass(<2 x float>, i32) @@ -159,6 +162,22 @@ define <2 x float> @scalarize_powi_v2f32(<2 x float> %x, i32 %y) #0 { ret <2 x float> %powi } +define <2 x float> @scalarize_ldexp_v2f32(<2 x float> %x, <2 x i32> %y) #0 { +; CHECK-LABEL: @scalarize_ldexp_v2f32( +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y:%.*]] = extractelement <2 x i32> [[Y1:%.*]], i64 0 +; CHECK-NEXT: [[POWI_I0:%.*]] = call float @llvm.ldexp.f32.i32(float [[X_I0]], i32 [[Y]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x i32> [[Y1]], i64 1 +; CHECK-NEXT: [[POWI_I1:%.*]] = call float @llvm.ldexp.f32.i32(float [[X_I1]], i32 [[Y_I1]]) +; CHECK-NEXT: [[POWI_UPTO0:%.*]] = insertelement <2 x float> poison, float [[POWI_I0]], i64 0 +; CHECK-NEXT: [[POWI:%.*]] = insertelement <2 x float> [[POWI_UPTO0]], float [[POWI_I1]], i64 1 +; CHECK-NEXT: ret <2 x float> [[POWI]] +; + %powi = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> %y) + ret <2 x float> %powi +} + define <2 x i32> @scalarize_smul_fix_sat_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: @scalarize_smul_fix_sat_v2i32( ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 @@ -243,6 +262,34 @@ define <2 x i32> @scalarize_llrint(<2 x float> %x) #0 { ret <2 x i32> %rnd } +define <2 x i32> @scalarize_lround(<2 x float> %x) #0 { +; CHECK-LABEL: @scalarize_lround( +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.lround.i32.f32(float [[X_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.lround.i32.f32(float [[X_I1]]) +; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0 +; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[RND]] +; + %rnd = call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %rnd +} + +define <2 x i32> @scalarize_llround(<2 x float> %x) #0 { +; CHECK-LABEL: @scalarize_llround( +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.llround.i32.f32(float [[X_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.llround.i32.f32(float [[X_I1]]) +; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0 +; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[RND]] +; + %rnd = call <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %rnd +} + define <2 x i1> @scalarize_is_fpclass(<2 x float> %x) #0 { ; CHECK-LABEL: @scalarize_is_fpclass( ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0