From 1da48f4a0d3748ee7950964ddb0dc46d29cd2f55 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Tue, 30 Jan 2024 12:05:41 +0000 Subject: [PATCH 1/3] [AArch64][CostModel] Improve scalar frem cost In AArch64 the cost of scalar frem is the cost of a call to 'fmod'. --- .../AArch64/AArch64TargetTransformInfo.cpp | 8 +++ .../CostModel/AArch64/arith-fp-frem.ll | 68 +++++++++---------- .../Analysis/CostModel/AArch64/arith-fp.ll | 22 +++--- 3 files changed, 53 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 6655931181c2d..5966ab83f056e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2972,6 +2972,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); + case ISD::FREM: + if (!Ty->isVectorTy()) { + Function *F = + CxtI == nullptr ? nullptr : CxtI->getModule()->getFunction("fmod"); + return getCallInstrCost(F, Ty, {Ty, Ty}, CostKind); + } + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info); } } diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll index 20e0ef7ea3428..63149adfa2158 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll @@ -22,44 +22,44 @@ target triple = "aarch64-unknown-linux-gnu" define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; NEON-NO-VECLIB-LABEL: 'frem_f64' -; NEON-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; NEON-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; NEON-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; NEON-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem double %in, %in ; ; SVE-NO-VECLIB-LABEL: 'frem_f64' -; SVE-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; SVE-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; SVE-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; SVE-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem double %in, %in ; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in ; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem double %in, %in ; ; NEON-ARMPL-LABEL: 'frem_f64' -; NEON-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; NEON-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in ; ; NEON-SLEEF-LABEL: 'frem_f64' -; NEON-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; NEON-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in ; ; SVE-ARMPL-LABEL: 'frem_f64' -; SVE-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; SVE-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in ; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in ; SVE-ARMPL: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in ; ; SVE-SLEEF-LABEL: 'frem_f64' -; SVE-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; SVE-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in ; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in ; SVE-SLEEF: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in ; ; SVE-ARMPL-TAILFOLD-LABEL: 'frem_f64' -; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in ; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in ; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in ; ; SVE-SLEEF-TAILFOLD-LABEL: 'frem_f64' -; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in -; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in +; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem double %in, %in +; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 2 For instruction: %res = frem double %in, %in ; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in ; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in ; @@ -83,55 +83,55 @@ define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { define void @frem_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; NEON-NO-VECLIB-LABEL: 'frem_f32' -; NEON-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; NEON-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in -; NEON-NO-VECLIB: LV: Found an estimated cost of 20 for VF 4 For instruction: %res = frem float %in, %in +; NEON-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; NEON-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in +; NEON-NO-VECLIB: LV: Found an estimated cost of 52 for VF 4 For instruction: %res = frem float %in, %in ; ; SVE-NO-VECLIB-LABEL: 'frem_f32' -; SVE-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; SVE-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in -; SVE-NO-VECLIB: LV: Found an estimated cost of 20 for VF 4 For instruction: %res = frem float %in, %in +; SVE-NO-VECLIB: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; SVE-NO-VECLIB: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in +; SVE-NO-VECLIB: LV: Found an estimated cost of 52 for VF 4 For instruction: %res = frem float %in, %in ; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in ; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in ; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = frem float %in, %in ; ; NEON-ARMPL-LABEL: 'frem_f32' -; NEON-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; NEON-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in +; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; NEON-ARMPL: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in ; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in ; ; NEON-SLEEF-LABEL: 'frem_f32' -; NEON-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; NEON-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in +; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; NEON-SLEEF: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in ; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in ; ; SVE-ARMPL-LABEL: 'frem_f32' -; SVE-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; SVE-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in +; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; SVE-ARMPL: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in ; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in ; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in ; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in ; SVE-ARMPL: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in ; ; SVE-SLEEF-LABEL: 'frem_f32' -; SVE-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; SVE-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in +; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; SVE-SLEEF: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in ; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in ; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in ; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in ; SVE-SLEEF: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in ; ; SVE-ARMPL-TAILFOLD-LABEL: 'frem_f32' -; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in +; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in ; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in ; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in ; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in ; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in ; ; SVE-SLEEF-TAILFOLD-LABEL: 'frem_f32' -; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in -; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in +; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 1 For instruction: %res = frem float %in, %in +; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 24 for VF 2 For instruction: %res = frem float %in, %in ; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in ; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in ; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index c352892354fc2..497ade4f2f613 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -197,17 +197,17 @@ define i32 @fdiv(i32 %arg) { define i32 @frem(i32 %arg) { ; CHECK-LABEL: 'frem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F16 = frem <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8F16 = frem <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V16F16 = frem <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = frem <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = frem <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8F32 = frem <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = frem <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = frem <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F16 = frem half undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4F16 = frem <4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8F16 = frem <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V16F16 = frem <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = frem float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F32 = frem <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4F32 = frem <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8F32 = frem <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = frem double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = frem <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F64 = frem <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = frem half undef, undef From 623e9be818f3533db4d83c2bf5c34d9d5ecfe253 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Tue, 6 Feb 2024 16:46:37 +0000 Subject: [PATCH 2/3] Pass nullptr to get costs of the call to fmod/fmodff --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 5966ab83f056e..ce8cd629bf501 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2973,11 +2973,10 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); case ISD::FREM: - if (!Ty->isVectorTy()) { - Function *F = - CxtI == nullptr ? nullptr : CxtI->getModule()->getFunction("fmod"); - return getCallInstrCost(F, Ty, {Ty, Ty}, CostKind); - } + // Pass nullptr as fmod/fmodf calls are emitted by the backend even when + // those functions are not delcared in the module. + if (!Ty->isVectorTy()) + return getCallInstrCost(/*Function*/ nullptr, Ty, {Ty, Ty}, CostKind); return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); } From cfcc272f3312fd0da1df9602cdef450632f16dbc Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Wed, 21 Feb 2024 15:02:46 +0000 Subject: [PATCH 3/3] Addressing reviewers --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ce8cd629bf501..010e569809e27 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2974,7 +2974,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( Op2Info); case ISD::FREM: // Pass nullptr as fmod/fmodf calls are emitted by the backend even when - // those functions are not delcared in the module. + // those functions are not declared in the module. if (!Ty->isVectorTy()) return getCallInstrCost(/*Function*/ nullptr, Ty, {Ty, Ty}, CostKind); return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,