diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ab93f6a800775..d8cf831da2d29 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8622,17 +8622,20 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); } } + + // With an index of 0 this is a cast-like subvector, which can be performed + // with subregister operations. + if (OrigIdx == 0) + return Op; + + auto KnownVLen = Subtarget.getRealKnownVLen(); // If the subvector vector is a fixed-length type, we cannot use subregister - // manipulation to simplify the codegen; we don't know which register of a - // LMUL group contains the specific subvector as we only know the minimum - // register size. Therefore we must slide the vector group down the full - // amount. - if (SubVecVT.isFixedLengthVector()) { - // With an index of 0 this is a cast-like subvector, which can be performed - // with subregister operations. - if (OrigIdx == 0) - return Op; + // manipulation to simplify the codegen if we don't know VLEN; we don't know + // which register of a LMUL group contains the specific subvector as we only + // know the minimum register size. Therefore we must slide the vector group + // down the full amount. + if (SubVecVT.isFixedLengthVector() && !KnownVLen) { MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); @@ -8653,36 +8656,68 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, return DAG.getBitcast(Op.getValueType(), Slidedown); } + if (VecVT.isFixedLengthVector()) { + VecVT = getContainerForFixedLengthVector(VecVT); + Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget); + } + + // The semantics of extract_subvector are that if the extracted subvector is + // scalable, then the index is scaled by vscale. So if we have a fixed length + // subvector, we need to factor that in before we decompose it to + // subregisters... + MVT ContainerSubVecVT = SubVecVT; + unsigned EffectiveIdx = OrigIdx; + unsigned Vscale = *KnownVLen / RISCV::RVVBitsPerBlock; + if (SubVecVT.isFixedLengthVector()) { + assert(KnownVLen); + ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT); + EffectiveIdx = OrigIdx / Vscale; + } + unsigned SubRegIdx, RemIdx; std::tie(SubRegIdx, RemIdx) = RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( - VecVT, SubVecVT, OrigIdx, TRI); + VecVT, ContainerSubVecVT, EffectiveIdx, TRI); + + // ... and scale the remainder back afterwards. + if (SubVecVT.isFixedLengthVector()) + RemIdx = (RemIdx * Vscale) + (OrigIdx % Vscale); // If the Idx has been completely eliminated then this is a subvector extract // which naturally aligns to a vector register. These can easily be handled // using subregister manipulation. - if (RemIdx == 0) + if (RemIdx == 0) { + if (SubVecVT.isFixedLengthVector()) { + Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec); + return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget); + } return Op; + } - // Else we must shift our vector register directly to extract the subvector. - // Do this using VSLIDEDOWN. + // Else SubVecVT is a fractional LMUL and needs to be slid down. + assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second); // If the vector type is an LMUL-group type, extract a subvector equal to the - // nearest full vector register type. This should resolve to a EXTRACT_SUBREG - // instruction. + // nearest full vector register type. MVT InterSubVT = VecVT; if (VecVT.bitsGT(getLMUL1VT(VecVT))) { InterSubVT = getLMUL1VT(VecVT); - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, - DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); + Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); } // Slide this vector register down by the desired number of elements in order // to place the desired subvector starting at element 0. - SDValue SlidedownAmt = - DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); + SDValue SlidedownAmt; + if (SubVecVT.isFixedLengthVector()) + SlidedownAmt = DAG.getConstant(RemIdx, DL, Subtarget.getXLenVT()); + else + SlidedownAmt = + DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); + if (SubVecVT.isFixedLengthVector()) + VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); + SDValue Slidedown = getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index cf64dbc21bd8a..86dd9d6861362 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -152,6 +152,11 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned VLen = getMaxRVVVectorSizeInBits(); return VLen == 0 ? 65536 : VLen; } + std::optional getRealKnownVLen() const { + if (getRealMinVLen() == getRealMaxVLen()) + return getRealMinVLen(); + return std::nullopt; + } RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isSoftFPABI() const { return TargetABI == RISCVABI::ABI_LP64 || diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index ed84a97c804f6..a89a0065be555 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,UNKNOWNVLEN +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-vector-bits-max=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,KNOWNVLEN define void @extract_v2i8_v4i8_0(ptr %x, ptr %y) { ; CHECK-LABEL: extract_v2i8_v4i8_0: @@ -62,22 +62,64 @@ define void @extract_v2i8_v8i8_6(ptr %x, ptr %y) { ret void } -define void @extract_v2i32_v8i32_0(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i32_v8i32_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: ret +define void @extract_v1i32_v8i32_4(ptr %x, ptr %y) { +; UNKNOWNVLEN-LABEL: extract_v1i32_v8i32_4: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 4 +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v1i32_v8i32_4: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v9, (a1) +; KNOWNVLEN-NEXT: ret + %a = load <8 x i32>, ptr %x + %c = call <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %a, i64 4) + store <1 x i32> %c, ptr %y + ret void +} + +define void @extract_v1i32_v8i32_5(ptr %x, ptr %y) { +; UNKNOWNVLEN-LABEL: extract_v1i32_v8i32_5: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 5 +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret ; -; LMULMAX1-LABEL: extract_v2i32_v8i32_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; KNOWNVLEN-LABEL: extract_v1i32_v8i32_5: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 1 +; KNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a1) +; KNOWNVLEN-NEXT: ret + %a = load <8 x i32>, ptr %x + %c = call <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %a, i64 5) + store <1 x i32> %c, ptr %y + ret void +} + +define void @extract_v2i32_v8i32_0(ptr %x, ptr %y) { +; CHECK-LABEL: extract_v2i32_v8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 0) store <2 x i32> %c, ptr %y @@ -85,52 +127,75 @@ define void @extract_v2i32_v8i32_0(ptr %x, ptr %y) { } define void @extract_v2i32_v8i32_2(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i32_v8i32_2: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_v8i32_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret ; -; LMULMAX1-LABEL: extract_v2i32_v8i32_2: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; KNOWNVLEN-LABEL: extract_v2i32_v8i32_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 2) store <2 x i32> %c, ptr %y ret void } +define void @extract_v2i32_v8i32_4(ptr %x, ptr %y) { +; UNKNOWNVLEN-LABEL: extract_v2i32_v8i32_4: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 4 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_v8i32_4: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v9, (a1) +; KNOWNVLEN-NEXT: ret + %a = load <8 x i32>, ptr %x + %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 4) + store <2 x i32> %c, ptr %y + ret void +} + define void @extract_v2i32_v8i32_6(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i32_v8i32_6: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 6 -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_v8i32_6: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 6 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret ; -; LMULMAX1-LABEL: extract_v2i32_v8i32_6: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; KNOWNVLEN-LABEL: extract_v2i32_v8i32_6: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 6) store <2 x i32> %c, ptr %y @@ -148,19 +213,87 @@ define void @extract_v2i32_nxv16i32_0( %x, ptr %y) { ret void } + +define void @extract_v2i32_nxv16i32_2( %x, ptr %y) { +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a0) +; KNOWNVLEN-NEXT: ret + %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 2) + store <2 x i32> %c, ptr %y + ret void +} + +define void @extract_v2i32_nxv16i32_4( %x, ptr %y) { +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_4: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 4 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_4: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v9, (a0) +; KNOWNVLEN-NEXT: ret + %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 4) + store <2 x i32> %c, ptr %y + ret void +} + define void @extract_v2i32_nxv16i32_6( %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_nxv16i32_6: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 6 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_6: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 6 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_6: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 6) store <2 x i32> %c, ptr %y ret void } +define void @extract_v2i32_nxv16i32_8( %x, ptr %y) { +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_8: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_8: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v10, (a0) +; KNOWNVLEN-NEXT: ret + %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 8) + store <2 x i32> %c, ptr %y + ret void +} + define void @extract_v2i8_nxv2i8_0( %x, ptr %y) { ; CHECK-LABEL: extract_v2i8_nxv2i8_0: ; CHECK: # %bb.0: @@ -185,47 +318,60 @@ define void @extract_v2i8_nxv2i8_2( %x, ptr %y) { ret void } +define void @extract_v2i8_nxv2i8_4( %x, ptr %y) { +; CHECK-LABEL: extract_v2i8_nxv2i8_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %x, i64 4) + store <2 x i8> %c, ptr %y + ret void +} + +define void @extract_v2i8_nxv2i8_6( %x, ptr %y) { +; CHECK-LABEL: extract_v2i8_nxv2i8_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 6 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %x, i64 6) + store <2 x i8> %c, ptr %y + ret void +} + define void @extract_v8i32_nxv16i32_8( %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i32_nxv16i32_8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m8, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 8 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v8i32_nxv16i32_8: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret ; -; LMULMAX1-LABEL: extract_v8i32_nxv16i32_8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m8, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v8, 8 -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 12 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v16, (a0) -; LMULMAX1-NEXT: ret +; KNOWNVLEN-LABEL: extract_v8i32_nxv16i32_8: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v10, (a0) +; KNOWNVLEN-NEXT: ret %c = call <8 x i32> @llvm.vector.extract.v8i32.nxv16i32( %x, i64 8) store <8 x i32> %c, ptr %y ret void } define void @extract_v8i1_v64i1_0(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i1_v64i1_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i1_v64i1_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i1_v64i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0) store <8 x i1> %c, ptr %y @@ -233,26 +379,16 @@ define void @extract_v8i1_v64i1_0(ptr %x, ptr %y) { } define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i1_v64i1_8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i1_v64i1_8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i1_v64i1_8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8) store <8 x i1> %c, ptr %y @@ -260,26 +396,16 @@ define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) { } define void @extract_v8i1_v64i1_48(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i1_v64i1_48: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a0, a0, 4 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i1_v64i1_48: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 6 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i1_v64i1_48: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 6 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48) store <8 x i1> %c, ptr %y @@ -334,40 +460,49 @@ define void @extract_v8i1_nxv64i1_48( %x, ptr %y) { ret void } +define void @extract_v8i1_nxv64i1_128( %x, ptr %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v0, 16 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1( %x, i64 128) + store <8 x i1> %c, ptr %y + ret void +} + +define void @extract_v8i1_nxv64i1_192( %x, ptr %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_192: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v0, 24 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1( %x, i64 192) + store <8 x i1> %c, ptr %y + ret void +} define void @extract_v2i1_v64i1_0(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i1_v64i1_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v9, 0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v9, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i1_v64i1_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i1_v64i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v8, v9, 0 +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0) store <2 x i1> %c, ptr %y @@ -375,48 +510,49 @@ define void @extract_v2i1_v64i1_0(ptr %x, ptr %y) { } define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i1_v64i1_2: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX2-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v9, 0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v9, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_v64i1_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: li a2, 64 +; UNKNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vlm.v v0, (a0) +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret ; -; LMULMAX1-LABEL: extract_v2i1_v64i1_2: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; KNOWNVLEN-LABEL: extract_v2i1_v64i1_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: li a2, 64 +; KNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; KNOWNVLEN-NEXT: vlm.v v0, (a0) +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2) store <2 x i1> %c, ptr %y @@ -424,50 +560,50 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { } define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i1_v64i1_42: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a0, a0, 4 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 10 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX2-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v9, 0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v9, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_v64i1_42: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: li a2, 64 +; UNKNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vlm.v v0, (a0) +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: li a0, 42 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vx v8, v8, a0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret ; -; LMULMAX1-LABEL: extract_v2i1_v64i1_42: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 4 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 10 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; KNOWNVLEN-LABEL: extract_v2i1_v64i1_42: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: li a2, 64 +; KNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; KNOWNVLEN-NEXT: vlm.v v0, (a0) +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v10, 10 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42) store <2 x i1> %c, ptr %y @@ -538,76 +674,136 @@ define void @extract_v2i1_nxv64i1_0( %x, ptr %y) { } define void @extract_v2i1_nxv64i1_2( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv64i1_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_nxv64i1_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_nxv64i1_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1( %x, i64 2) store <2 x i1> %c, ptr %y ret void } define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv64i1_42: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: li a1, 42 -; CHECK-NEXT: vsetivli zero, 2, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_nxv64i1_42: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: li a1, 42 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vx v8, v8, a1 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_nxv64i1_42: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v10, 10 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1( %x, i64 42) store <2 x i1> %c, ptr %y ret void } define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv32i1_26: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 26 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_nxv32i1_26: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 26 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_nxv32i1_26: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 10 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv32i1( %x, i64 26) store <2 x i1> %c, ptr %y ret void @@ -640,6 +836,8 @@ declare <8 x i1> @llvm.vector.extract.v8i1.nxv64i1( %vec, i64 declare <2 x i8> @llvm.vector.extract.v2i8.v4i8(<4 x i8> %vec, i64 %idx) declare <2 x i8> @llvm.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx) + +declare <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %vec, i64 %idx) declare <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx) declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %vec, i64 %idx)