From 310d453729eef8e182f310b5775f109ee0780f1c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 13 Mar 2024 15:29:26 -0700 Subject: [PATCH] [RISCV][TTI] Cost a subvector insert at a register boundary with exact vlen If we have exact vlen knowledge, we can figure out which indices correspond to register boundaries. Our lowering will use this knowledge to replace the vslideup.vi with a sub-register insert when the subvec passthru is undef. One case where the subvec passthru is known undef is when the subvec completely fills the subregister, and that's the easiest case to recognize during costing. Note: This is cost modeling a lowering which hasn't landed yet, see https://github.com/llvm/llvm-project/pull/84107. This change will not land until after that one does. This is another piece split off https://github.com/llvm/llvm-project/pull/80164 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 16 ++++++++++++++++ .../CostModel/RISCV/shuffle-insert_subvector.ll | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 8f46fdc2f7ca9..34ffac8ab1650 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -469,6 +469,22 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind); case TTI::SK_InsertSubvector: + // If we're inserting a subvector of *exactly* m1 size at a sub-register + // boundary this is a subregister insert at worst and won't require the + // slideup. We require the subvec to to be exactly VLEN as otherwise + // we'd have to account for tail elements in the m1 container if any. + // TODO: Extend for aligned m2, m4 inserts + // TODO: Extend for scalable subvector types + if (std::pair SubLT = getTypeLegalizationCost(SubTp); + SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) { + const unsigned MinVLen = ST->getRealMinVLen(); + const unsigned MaxVLen = ST->getRealMaxVLen(); + if (MinVLen == MaxVLen && + SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 && + SubLT.second.getSizeInBits() == MinVLen) + return TTI::TCC_Free; + } + // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslideup.vi v8, v9, 2 diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll index a91d562b3f6f1..9b07e57752eec 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll @@ -527,7 +527,7 @@ define void @fixed_m1_in_m2_notail(<8 x i32> %src, <8 x i32> %passthru) vscale_r ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'fixed_m1_in_m2_notail' @@ -535,7 +535,7 @@ define void @fixed_m1_in_m2_notail(<8 x i32> %src, <8 x i32> %passthru) vscale_r ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32>