From 3430862c4dac90d82bcb1eaccdd1f136b1dcc371 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 5 Sep 2023 09:14:02 -0700 Subject: [PATCH 1/2] [RISCV] Move slide and gather costing to subtarget [NFC] As discussed during review of D159332. This PR doesn't actually common up that copy of the code because doing so is not NFC - due to DLEN. Fixing that will be a future PR. --- llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 46 ++++++++++++ llvm/lib/Target/RISCV/RISCVSubtarget.h | 7 ++ .../Target/RISCV/RISCVTargetTransformInfo.cpp | 74 ++++--------------- .../Target/RISCV/RISCVTargetTransformInfo.h | 7 -- 4 files changed, 68 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index aa0275830e2a8..cc1b573244fd7 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -20,6 +20,7 @@ #include "RISCVTargetMachine.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstructionCost.h" using namespace llvm; @@ -163,6 +164,51 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { return hasVInstructions() && getMinRVVVectorSizeInBits() != 0; } +InstructionCost RISCVSubtarget::getLMULCost(MVT VT) const { + // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is + // implementation-defined. + if (!VT.isVector()) + return InstructionCost::getInvalid(); + unsigned DLenFactor = getDLenFactor(); + unsigned Cost; + if (VT.isScalableVector()) { + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = + RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); + if (Fractional) + Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; + else + Cost = (LMul * DLenFactor); + } else { + Cost = divideCeil(VT.getSizeInBits(), getRealMinVLen() / DLenFactor); + } + return Cost; +} + + +/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv +/// is generally quadratic in the number of vreg implied by LMUL. Note that +/// operand (index and possibly mask) are handled separately. +InstructionCost RISCVSubtarget::getVRGatherVVCost(MVT VT) const { + return getLMULCost(VT) * getLMULCost(VT); +} + +/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. +/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVSubtarget::getVRGatherVICost(MVT VT) const { + return getLMULCost(VT); +} + +/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction +/// for the type VT. (This does not cover the vslide1up or vslide1down +/// variants.) Slides may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVSubtarget::getVSlideCost(MVT VT) const { + return getLMULCost(VT); +} + bool RISCVSubtarget::enableSubRegLiveness() const { // FIXME: Enable subregister liveness by default for RVV to better handle // LMUL>1 and segment load/store. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index cf64dbc21bd8a..e64fbc8a67680 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -221,6 +221,13 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned getMaxLMULForFixedLengthVectors() const; bool useRVVForFixedLengthVectors() const; + /// Return the cost of LMUL for linear operations. + InstructionCost getLMULCost(MVT VT) const; + + InstructionCost getVRGatherVVCost(MVT VT) const; + InstructionCost getVRGatherVICost(MVT VT) const; + InstructionCost getVSlideCost(MVT VT) const; + bool enableSubRegLiveness() const override; void getPostRAMutations(std::vector> diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 7cf8c7001e511..2178b2d457e0b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -34,28 +34,6 @@ static cl::opt SLPMaxVF( "exclusively by SLP vectorizer."), cl::Hidden); -InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) { - // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is - // implementation-defined. - if (!VT.isVector()) - return InstructionCost::getInvalid(); - unsigned DLenFactor = ST->getDLenFactor(); - unsigned Cost; - if (VT.isScalableVector()) { - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = - RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); - if (Fractional) - Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; - else - Cost = (LMul * DLenFactor); - } else { - Cost = divideCeil(VT.getSizeInBits(), ST->getRealMinVLen() / DLenFactor); - } - return Cost; -} - InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && @@ -263,28 +241,6 @@ static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, return cast(EVT(IndexVT).getTypeForEVT(C)); } -/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv -/// is generally quadratic in the number of vreg implied by LMUL. Note that -/// operand (index and possibly mask) are handled separately. -InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) { - return getLMULCost(VT) * getLMULCost(VT); -} - -/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. -/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, -/// or may track the vrgather.vv cost. It is implementation-dependent. -InstructionCost RISCVTTIImpl::getVRGatherVICost(MVT VT) { - return getLMULCost(VT); -} - -/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction -/// for the type VT. (This does not cover the vslide1up or vslide1down -/// variants.) Slides may be linear in the number of vregs implied by LMUL, -/// or may track the vrgather.vv cost. It is implementation-dependent. -InstructionCost RISCVTTIImpl::getVSlideCost(MVT VT) { - return getLMULCost(VT); -} - InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, @@ -314,14 +270,14 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // li a0, -1 (ignored) // vwmaccu.vx v10, a0, v9 if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size())) - return 2 * LT.first * getLMULCost(LT.second); + return 2 * LT.first * ST->getLMULCost(LT.second); if (Mask[0] == 0 || Mask[0] == 1) { auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size()); // Example sequence: // vnsrl.wi v10, v8, 0 if (equal(DeinterleaveMask, Mask)) - return LT.first * getLMULCost(LT.second); + return LT.first * ST->getLMULCost(LT.second); } } } @@ -332,7 +288,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, LT.second.getVectorNumElements() <= 256)) { VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + getVRGatherVVCost(LT.second); + return IndexCost + ST->getVRGatherVVCost(LT.second); } [[fallthrough]]; } @@ -350,7 +306,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost; + return 2 * IndexCost + 2 * ST->getVRGatherVVCost(LT.second) + MaskCost; } [[fallthrough]]; } @@ -402,19 +358,19 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslidedown.vi v8, v9, 2 - return LT.first * getVSlideCost(LT.second); + return LT.first * ST->getVSlideCost(LT.second); case TTI::SK_InsertSubvector: // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslideup.vi v8, v9, 2 - return LT.first * getVSlideCost(LT.second); + return LT.first * ST->getVSlideCost(LT.second); case TTI::SK_Select: { // Example sequence: // li a0, 90 // vsetivli zero, 8, e8, mf2, ta, ma (ignored) // vmv.s.x v0, a0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * 3 * getLMULCost(LT.second); + return LT.first * 3 * ST->getLMULCost(LT.second); } case TTI::SK_Broadcast: { bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) == @@ -426,7 +382,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vsetivli zero, 2, e8, mf8, ta, ma (ignored) // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * getLMULCost(LT.second) * 3; + return LT.first * ST->getLMULCost(LT.second) * 3; } // Example sequence: // vsetivli zero, 2, e8, mf8, ta, mu (ignored) @@ -437,24 +393,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * getLMULCost(LT.second) * 6; + return LT.first * ST->getLMULCost(LT.second) * 6; } if (HasScalar) { // Example sequence: // vmv.v.x v8, a0 - return LT.first * getLMULCost(LT.second); + return LT.first * ST->getLMULCost(LT.second); } // Example sequence: // vrgather.vi v9, v8, 0 - return LT.first * getVRGatherVICost(LT.second); + return LT.first * ST->getVRGatherVICost(LT.second); } case TTI::SK_Splice: // vslidedown+vslideup. // TODO: Multiplying by LT.first implies this legalizes into multiple copies // of similar code, but I think we expand through memory. - return 2 * LT.first * getVSlideCost(LT.second); + return 2 * LT.first * ST->getVSlideCost(LT.second); case TTI::SK_Reverse: { // TODO: Cases to improve here: // * Illegal vector types @@ -474,7 +430,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (LT.second.isFixedLengthVector()) // vrsub.vi has a 5 bit immediate field, otherwise an li suffices LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; - InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second); + InstructionCost GatherCost = 2 + ST->getVRGatherVVCost(LT.second); // Mask operation additionally required extend and truncate InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; return LT.first * (LenCost + GatherCost + ExtendCost); @@ -1393,7 +1349,7 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // handles the LT.first term for us. if (std::pair LT = getTypeLegalizationCost(Src); LT.second.isVector()) - BaseCost *= getLMULCost(LT.second); + BaseCost *= ST->getLMULCost(LT.second); return Cost + BaseCost; } @@ -1641,7 +1597,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( case ISD::FSUB: case ISD::FMUL: case ISD::FNEG: { - return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1; + return ConstantMatCost + ST->getLMULCost(LT.second) * LT.first * 1; } default: return ConstantMatCost + diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 8e86940d03a02..f836799649c26 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -48,9 +48,6 @@ class RISCVTTIImpl : public BasicTTIImplBase { /// actual target hardware. unsigned getEstimatedVLFor(VectorType *Ty); - /// Return the cost of LMUL. The larger the LMUL, the higher the cost. - InstructionCost getLMULCost(MVT VT); - /// Return the cost of accessing a constant pool entry of the specified /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, @@ -123,10 +120,6 @@ class RISCVTTIImpl : public BasicTTIImplBase { return ST->useRVVForFixedLengthVectors() ? 16 : 0; } - InstructionCost getVRGatherVVCost(MVT VT); - InstructionCost getVRGatherVICost(MVT VT); - InstructionCost getVSlideCost(MVT VT); - InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, From 8f03ff844968a39658c918c75e7e1c78b81e9e64 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 7 Sep 2023 14:23:04 -0700 Subject: [PATCH 2/2] Address review feedback --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 45 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 8 ++++ llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 46 ------------------- llvm/lib/Target/RISCV/RISCVSubtarget.h | 7 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 30 ++++++------ 5 files changed, 68 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 05e656ac81702..e551a226a7577 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2509,6 +2509,51 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( return false; } +InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { + // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is + // implementation-defined. + if (!VT.isVector()) + return InstructionCost::getInvalid(); + unsigned DLenFactor = Subtarget.getDLenFactor(); + unsigned Cost; + if (VT.isScalableVector()) { + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = + RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); + if (Fractional) + Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; + else + Cost = (LMul * DLenFactor); + } else { + Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor); + } + return Cost; +} + + +/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv +/// is generally quadratic in the number of vreg implied by LMUL. Note that +/// operand (index and possibly mask) are handled separately. +InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { + return getLMULCost(VT) * getLMULCost(VT); +} + +/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. +/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { + return getLMULCost(VT); +} + +/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction +/// for the type VT. (This does not cover the vslide1up or vslide1down +/// variants.) Slides may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const { + return getLMULCost(VT); +} + static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { // RISC-V FP-to-int conversions saturate to the destination register size, but diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index c15725de506e3..44d421c9cae73 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -22,6 +22,7 @@ #include namespace llvm { +class InstructionCost; class RISCVSubtarget; struct RISCVRegisterInfo; namespace RISCVISD { @@ -520,6 +521,13 @@ class RISCVTargetLowering : public TargetLowering { shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + /// Return the cost of LMUL for linear operations. + InstructionCost getLMULCost(MVT VT) const; + + InstructionCost getVRGatherVVCost(MVT VT) const; + InstructionCost getVRGatherVICost(MVT VT) const; + InstructionCost getVSlideCost(MVT VT) const; + // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index cc1b573244fd7..aa0275830e2a8 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -20,7 +20,6 @@ #include "RISCVTargetMachine.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstructionCost.h" using namespace llvm; @@ -164,51 +163,6 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { return hasVInstructions() && getMinRVVVectorSizeInBits() != 0; } -InstructionCost RISCVSubtarget::getLMULCost(MVT VT) const { - // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is - // implementation-defined. - if (!VT.isVector()) - return InstructionCost::getInvalid(); - unsigned DLenFactor = getDLenFactor(); - unsigned Cost; - if (VT.isScalableVector()) { - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = - RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); - if (Fractional) - Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; - else - Cost = (LMul * DLenFactor); - } else { - Cost = divideCeil(VT.getSizeInBits(), getRealMinVLen() / DLenFactor); - } - return Cost; -} - - -/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv -/// is generally quadratic in the number of vreg implied by LMUL. Note that -/// operand (index and possibly mask) are handled separately. -InstructionCost RISCVSubtarget::getVRGatherVVCost(MVT VT) const { - return getLMULCost(VT) * getLMULCost(VT); -} - -/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. -/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, -/// or may track the vrgather.vv cost. It is implementation-dependent. -InstructionCost RISCVSubtarget::getVRGatherVICost(MVT VT) const { - return getLMULCost(VT); -} - -/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction -/// for the type VT. (This does not cover the vslide1up or vslide1down -/// variants.) Slides may be linear in the number of vregs implied by LMUL, -/// or may track the vrgather.vv cost. It is implementation-dependent. -InstructionCost RISCVSubtarget::getVSlideCost(MVT VT) const { - return getLMULCost(VT); -} - bool RISCVSubtarget::enableSubRegLiveness() const { // FIXME: Enable subregister liveness by default for RVV to better handle // LMUL>1 and segment load/store. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index e64fbc8a67680..cf64dbc21bd8a 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -221,13 +221,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned getMaxLMULForFixedLengthVectors() const; bool useRVVForFixedLengthVectors() const; - /// Return the cost of LMUL for linear operations. - InstructionCost getLMULCost(MVT VT) const; - - InstructionCost getVRGatherVVCost(MVT VT) const; - InstructionCost getVRGatherVICost(MVT VT) const; - InstructionCost getVSlideCost(MVT VT) const; - bool enableSubRegLiveness() const override; void getPostRAMutations(std::vector> diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 2178b2d457e0b..6b950cd8a49fc 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -270,14 +270,14 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // li a0, -1 (ignored) // vwmaccu.vx v10, a0, v9 if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size())) - return 2 * LT.first * ST->getLMULCost(LT.second); + return 2 * LT.first * TLI->getLMULCost(LT.second); if (Mask[0] == 0 || Mask[0] == 1) { auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size()); // Example sequence: // vnsrl.wi v10, v8, 0 if (equal(DeinterleaveMask, Mask)) - return LT.first * ST->getLMULCost(LT.second); + return LT.first * TLI->getLMULCost(LT.second); } } } @@ -288,7 +288,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, LT.second.getVectorNumElements() <= 256)) { VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + ST->getVRGatherVVCost(LT.second); + return IndexCost + TLI->getVRGatherVVCost(LT.second); } [[fallthrough]]; } @@ -306,7 +306,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * ST->getVRGatherVVCost(LT.second) + MaskCost; + return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost; } [[fallthrough]]; } @@ -358,19 +358,19 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslidedown.vi v8, v9, 2 - return LT.first * ST->getVSlideCost(LT.second); + return LT.first * TLI->getVSlideCost(LT.second); case TTI::SK_InsertSubvector: // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslideup.vi v8, v9, 2 - return LT.first * ST->getVSlideCost(LT.second); + return LT.first * TLI->getVSlideCost(LT.second); case TTI::SK_Select: { // Example sequence: // li a0, 90 // vsetivli zero, 8, e8, mf2, ta, ma (ignored) // vmv.s.x v0, a0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * 3 * ST->getLMULCost(LT.second); + return LT.first * 3 * TLI->getLMULCost(LT.second); } case TTI::SK_Broadcast: { bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) == @@ -382,7 +382,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vsetivli zero, 2, e8, mf8, ta, ma (ignored) // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * ST->getLMULCost(LT.second) * 3; + return LT.first * TLI->getLMULCost(LT.second) * 3; } // Example sequence: // vsetivli zero, 2, e8, mf8, ta, mu (ignored) @@ -393,24 +393,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * ST->getLMULCost(LT.second) * 6; + return LT.first * TLI->getLMULCost(LT.second) * 6; } if (HasScalar) { // Example sequence: // vmv.v.x v8, a0 - return LT.first * ST->getLMULCost(LT.second); + return LT.first * TLI->getLMULCost(LT.second); } // Example sequence: // vrgather.vi v9, v8, 0 - return LT.first * ST->getVRGatherVICost(LT.second); + return LT.first * TLI->getVRGatherVICost(LT.second); } case TTI::SK_Splice: // vslidedown+vslideup. // TODO: Multiplying by LT.first implies this legalizes into multiple copies // of similar code, but I think we expand through memory. - return 2 * LT.first * ST->getVSlideCost(LT.second); + return 2 * LT.first * TLI->getVSlideCost(LT.second); case TTI::SK_Reverse: { // TODO: Cases to improve here: // * Illegal vector types @@ -430,7 +430,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (LT.second.isFixedLengthVector()) // vrsub.vi has a 5 bit immediate field, otherwise an li suffices LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; - InstructionCost GatherCost = 2 + ST->getVRGatherVVCost(LT.second); + InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second); // Mask operation additionally required extend and truncate InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; return LT.first * (LenCost + GatherCost + ExtendCost); @@ -1349,7 +1349,7 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // handles the LT.first term for us. if (std::pair LT = getTypeLegalizationCost(Src); LT.second.isVector()) - BaseCost *= ST->getLMULCost(LT.second); + BaseCost *= TLI->getLMULCost(LT.second); return Cost + BaseCost; } @@ -1597,7 +1597,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( case ISD::FSUB: case ISD::FMUL: case ISD::FNEG: { - return ConstantMatCost + ST->getLMULCost(LT.second) * LT.first * 1; + return ConstantMatCost + TLI->getLMULCost(LT.second) * LT.first * 1; } default: return ConstantMatCost +