llvm · preames · Mar 13, 2024 · lukel97 · Mar 18, 2024 · lukel97
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -469,6 +469,22 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
     return LT.first *
            getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);
   case TTI::SK_InsertSubvector:
+    // If we're inserting a subvector of *exactly* m1 size at a sub-register
+    // boundary this is a subregister insert at worst and won't require the
+    // slideup.  We require the subvec to to be exactly VLEN as otherwise
+    // we'd have to account for tail elements in the m1 container if any.
+    // TODO: Extend for aligned m2, m4 inserts
+    // TODO: Extend for scalable subvector types
+    if (std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
+        SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
+      const unsigned MinVLen = ST->getRealMinVLen();
+      const unsigned MaxVLen = ST->getRealMaxVLen();
+      if (MinVLen == MaxVLen &&
+          SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&
+          SubLT.second.getSizeInBits() == MinVLen)
+        return TTI::TCC_Free;
+    }
+
     // Example sequence:
     // vsetivli     zero, 4, e8, mf2, tu, ma (ignored)
     // vslideup.vi  v8, v9, 2

diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll
@@ -527,15 +527,15 @@ define void @fixed_m1_in_m2_notail(<8 x i32> %src, <8 x i32> %passthru) vscale_r
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 11, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fixed_m1_in_m2_notail'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 11, i32 7>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>